fixed language linking bug left from elimination of globals
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 include_once('Tokenizer.php');
4
5 # PHP Parser
6 #
7 # Converts wikitext to HTML.
8 #
9 # Globals used:
10 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
11 #
12 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
13 #
14 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
15 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
16 # $wgLocaltimezone
17 #
18 # * only within ParserOptions
19
20 class Parser
21 {
22 # Cleared with clearState():
23 var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
24
25 # Temporary:
26 var $mOptions, $mTitle;
27
28 function Parser()
29 {
30 $this->clearState();
31 }
32
33 function clearState()
34 {
35 $this->mOutput = new ParserOutput;
36 $this->mAutonumber = 0;
37 $this->mLastSection = "";
38 $this->mDTopen = false;
39 $this->mStripState = false;
40 }
41
42 # First pass--just handle <nowiki> sections, pass the rest off
43 # to doWikiPass2() which does all the real work.
44 #
45 # Returns a ParserOutput
46 #
47 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
48 {
49 $fname = "Parser::parse";
50 wfProfileIn( $fname );
51
52 if ( $clearState ) {
53 $this->clearState();
54 }
55
56 $this->mOptions = $options;
57 $this->mTitle =& $title;
58
59 $stripState = NULL;
60 $text = $this->strip( $text, $this->mStripState, true );
61 $text = $this->doWikiPass2( $text, $linestart );
62 $text = $this->unstrip( $text, $this->mStripState );
63
64 $this->mOutput->setText( $text );
65 wfProfileOut( $fname );
66 return $this->mOutput;
67 }
68
69 /* static */ function getRandomString()
70 {
71 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
72 }
73
74 # Strips <nowiki>, <pre> and <math>
75 # Returns the text, and fills an array with data needed in unstrip()
76 #
77 function strip( $text, &$state, $render = true )
78 {
79 $state = array(
80 'nwlist' => array(),
81 'nwsecs' => 0,
82 'nwunq' => Parser::getRandomString(),
83 'mathlist' => array(),
84 'mathsecs' => 0,
85 'mathunq' => Parser::getRandomString(),
86 'prelist' => array(),
87 'presecs' => 0,
88 'preunq' => Parser::getRandomString()
89 );
90
91 $stripped = "";
92 $stripped2 = "";
93 $stripped3 = "";
94
95 # Replace any instances of the placeholders
96 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
97 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
98 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
99
100 while ( "" != $text ) {
101 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
102 $stripped .= $p[0];
103 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
104 $text = "";
105 } else {
106 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
107 ++$state['nwsecs'];
108
109 if ( $render ) {
110 $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
111 } else {
112 $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
113 }
114
115 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
116 $text = $q[1];
117 }
118 }
119
120 if( $this->mOptions->getUseTeX() ) {
121 while ( "" != $stripped ) {
122 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
123 $stripped2 .= $p[0];
124 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
125 $stripped = "";
126 } else {
127 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
128 ++$state['mathsecs'];
129
130 if ( $render ) {
131 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
132 } else {
133 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
134 }
135
136 $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
137 $stripped = $q[1];
138 }
139 }
140 } else {
141 $stripped2 = $stripped;
142 }
143
144 while ( "" != $stripped2 ) {
145 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
146 $stripped3 .= $p[0];
147 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
148 $stripped2 = "";
149 } else {
150 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
151 ++$state['presecs'];
152
153 if ( $render ) {
154 $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
155 } else {
156 $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
157 }
158
159 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
160 $stripped2 = $q[1];
161 }
162 }
163 return $stripped3;
164 }
165
166 function unstrip( $text, &$state )
167 {
168 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
169 $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
170 }
171
172 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
173 $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
174 }
175
176 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
177 $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
178 }
179 return $text;
180 }
181
182 function categoryMagic ()
183 {
184 global $wgLang , $wgUser ;
185 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
186 $id = $this->mTitle->getArticleID() ;
187 $cat = ucfirst ( wfMsg ( "category" ) ) ;
188 $ti = $this->mTitle->getText() ;
189 $ti = explode ( ":" , $ti , 2 ) ;
190 if ( $cat != $ti[0] ) return "" ;
191 $r = "<br break=all>\n" ;
192
193 $articles = array() ;
194 $parents = array () ;
195 $children = array() ;
196
197
198 # $sk =& $this->mGetSkin();
199 $sk =& $wgUser->getSkin() ;
200
201 $doesexist = false ;
202 if ( $doesexist ) {
203 $sql = "SELECT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
204 } else {
205 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
206 }
207
208 $res = wfQuery ( $sql, DB_READ ) ;
209 while ( $x = wfFetchObject ( $res ) )
210 {
211 # $t = new Title ;
212 # $t->newFromDBkey ( $x->l_from ) ;
213 # $t = $t->getText() ;
214 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
215 if ( $t != "" ) $t .= ":" ;
216 $t .= $x->cur_title ;
217
218 $y = explode ( ":" , $t , 2 ) ;
219 if ( count ( $y ) == 2 && $y[0] == $cat ) {
220 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
221 } else {
222 array_push ( $articles , $sk->makeLink ( $t ) ) ;
223 }
224 }
225 wfFreeResult ( $res ) ;
226
227 # Children
228 if ( count ( $children ) > 0 )
229 {
230 asort ( $children ) ;
231 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
232 $r .= implode ( ", " , $children ) ;
233 }
234
235 # Articles
236 if ( count ( $articles ) > 0 )
237 {
238 asort ( $articles ) ;
239 $h = wfMsg( "category_header", $ti[1] );
240 $r .= "<h2>{$h}</h2>\n" ;
241 $r .= implode ( ", " , $articles ) ;
242 }
243
244
245 return $r ;
246 }
247
248 function getHTMLattrs ()
249 {
250 $htmlattrs = array( # Allowed attributes--no scripting, etc.
251 "title", "align", "lang", "dir", "width", "height",
252 "bgcolor", "clear", /* BR */ "noshade", /* HR */
253 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
254 /* FONT */ "type", "start", "value", "compact",
255 /* For various lists, mostly deprecated but safe */
256 "summary", "width", "border", "frame", "rules",
257 "cellspacing", "cellpadding", "valign", "char",
258 "charoff", "colgroup", "col", "span", "abbr", "axis",
259 "headers", "scope", "rowspan", "colspan", /* Tables */
260 "id", "class", "name", "style" /* For CSS */
261 );
262 return $htmlattrs ;
263 }
264
265 function fixTagAttributes ( $t )
266 {
267 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
268 $htmlattrs = $this->getHTMLattrs() ;
269
270 # Strip non-approved attributes from the tag
271 $t = preg_replace(
272 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
273 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
274 $t);
275 # Strip javascript "expression" from stylesheets. Brute force approach:
276 # If anythin offensive is found, all attributes of the HTML tag are dropped
277
278 if( preg_match(
279 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
280 wfMungeToUtf8( $t ) ) )
281 {
282 $t="";
283 }
284
285 return trim ( $t ) ;
286 }
287
288 function doTableStuff ( $t )
289 {
290 $t = explode ( "\n" , $t ) ;
291 $td = array () ; # Is currently a td tag open?
292 $ltd = array () ; # Was it TD or TH?
293 $tr = array () ; # Is currently a tr tag open?
294 $ltr = array () ; # tr attributes
295 foreach ( $t AS $k => $x )
296 {
297 $x = rtrim ( $x ) ;
298 $fc = substr ( $x , 0 , 1 ) ;
299 if ( "{|" == substr ( $x , 0 , 2 ) )
300 {
301 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
302 array_push ( $td , false ) ;
303 array_push ( $ltd , "" ) ;
304 array_push ( $tr , false ) ;
305 array_push ( $ltr , "" ) ;
306 }
307 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
308 else if ( "|}" == substr ( $x , 0 , 2 ) )
309 {
310 $z = "</table>\n" ;
311 $l = array_pop ( $ltd ) ;
312 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
313 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
314 array_pop ( $ltr ) ;
315 $t[$k] = $z ;
316 }
317 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
318 {
319 $z = trim ( substr ( $x , 2 ) ) ;
320 $t[$k] = "<caption>{$z}</caption>\n" ;
321 }*/
322 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
323 {
324 $x = substr ( $x , 1 ) ;
325 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
326 $z = "" ;
327 $l = array_pop ( $ltd ) ;
328 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
329 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
330 array_pop ( $ltr ) ;
331 $t[$k] = $z ;
332 array_push ( $tr , false ) ;
333 array_push ( $td , false ) ;
334 array_push ( $ltd , "" ) ;
335 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
336 }
337 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
338 {
339 if ( "|+" == substr ( $x , 0 , 2 ) )
340 {
341 $fc = "+" ;
342 $x = substr ( $x , 1 ) ;
343 }
344 $after = substr ( $x , 1 ) ;
345 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
346 $after = explode ( "||" , $after ) ;
347 $t[$k] = "" ;
348 foreach ( $after AS $theline )
349 {
350 $z = "" ;
351 if ( $fc != "+" )
352 {
353 $tra = array_pop ( $ltr ) ;
354 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
355 array_push ( $tr , true ) ;
356 array_push ( $ltr , "" ) ;
357 }
358
359 $l = array_pop ( $ltd ) ;
360 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
361 if ( $fc == "|" ) $l = "TD" ;
362 else if ( $fc == "!" ) $l = "TH" ;
363 else if ( $fc == "+" ) $l = "CAPTION" ;
364 else $l = "" ;
365 array_push ( $ltd , $l ) ;
366 $y = explode ( "|" , $theline , 2 ) ;
367 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
368 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
369 $t[$k] .= $y ;
370 array_push ( $td , true ) ;
371 }
372 }
373 }
374
375 # Closing open td, tr && table
376 while ( count ( $td ) > 0 )
377 {
378 if ( array_pop ( $td ) ) $t[] = "</td>" ;
379 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
380 $t[] = "</table>" ;
381 }
382
383 $t = implode ( "\n" , $t ) ;
384 # $t = $this->removeHTMLtags( $t );
385 return $t ;
386 }
387
388 # Well, OK, it's actually about 14 passes. But since all the
389 # hard lifting is done inside PHP's regex code, it probably
390 # wouldn't speed things up much to add a real parser.
391 #
392 function doWikiPass2( $text, $linestart )
393 {
394 $fname = "OutputPage::doWikiPass2";
395 wfProfileIn( $fname );
396
397 $text = $this->removeHTMLtags( $text );
398 $text = $this->replaceVariables( $text );
399
400 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
401 $text = str_replace ( "<HR>", "<hr>", $text );
402
403 $text = $this->doHeadings( $text );
404 $text = $this->doBlockLevels( $text, $linestart );
405
406 if($this->mOptions->getUseDynamicDates()) {
407 global $wgDateFormatter;
408 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
409 }
410
411 $text = $this->replaceExternalLinks( $text );
412 $text = $this->replaceInternalLinks ( $text );
413 $text = $this->doTableStuff ( $text ) ;
414
415 $text = $this->formatHeadings( $text );
416
417 $sk =& $this->mOptions->getSkin();
418 $text = $sk->transformContent( $text );
419 $text .= $this->categoryMagic () ;
420
421 wfProfileOut( $fname );
422 return $text;
423 }
424
425
426 /* private */ function doHeadings( $text )
427 {
428 for ( $i = 6; $i >= 1; --$i ) {
429 $h = substr( "======", 0, $i );
430 $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
431 "<h{$i}>\\1</h{$i}>\\2", $text );
432 }
433 return $text;
434 }
435
436 # Note: we have to do external links before the internal ones,
437 # and otherwise take great care in the order of things here, so
438 # that we don't end up interpreting some URLs twice.
439
440 /* private */ function replaceExternalLinks( $text )
441 {
442 $fname = "OutputPage::replaceExternalLinks";
443 wfProfileIn( $fname );
444 $text = $this->subReplaceExternalLinks( $text, "http", true );
445 $text = $this->subReplaceExternalLinks( $text, "https", true );
446 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
447 $text = $this->subReplaceExternalLinks( $text, "irc", false );
448 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
449 $text = $this->subReplaceExternalLinks( $text, "news", false );
450 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
451 wfProfileOut( $fname );
452 return $text;
453 }
454
455 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
456 {
457 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
458 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
459
460 # this is the list of separators that should be ignored if they
461 # are the last character of an URL but that should be included
462 # if they occur within the URL, e.g. "go to www.foo.com, where .."
463 # in this case, the last comma should not become part of the URL,
464 # but in "www.foo.com/123,2342,32.htm" it should.
465 $sep = ",;\.:";
466 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
467 $images = "gif|png|jpg|jpeg";
468
469 # PLEASE NOTE: The curly braces { } are not part of the regex,
470 # they are interpreted as part of the string (used to tell PHP
471 # that the content of the string should be inserted there).
472 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
473 "((?i){$images})([^{$uc}]|$)/";
474
475 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
476 $sk =& $this->mOptions->getSkin();
477
478 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
479 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
480 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
481 }
482 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
483 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
484 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
485 "</a>\\5", $s );
486 $s = str_replace( $unique, $protocol, $s );
487
488 $a = explode( "[{$protocol}:", " " . $s );
489 $s = array_shift( $a );
490 $s = substr( $s, 1 );
491
492 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
493 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
494
495 foreach ( $a as $line ) {
496 if ( preg_match( $e1, $line, $m ) ) {
497 $link = "{$protocol}:{$m[1]}";
498 $trail = $m[2];
499 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
500 else { $text = wfEscapeHTML( $link ); }
501 } else if ( preg_match( $e2, $line, $m ) ) {
502 $link = "{$protocol}:{$m[1]}";
503 $text = $m[2];
504 $trail = $m[3];
505 } else {
506 $s .= "[{$protocol}:" . $line;
507 continue;
508 }
509 if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
510 else $paren = "";
511 $la = $sk->getExternalLinkAttributes( $link, $text );
512 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
513
514 }
515 return $s;
516 }
517
518 /* private */ function handle3Quotes( &$state, $token )
519 {
520 if ( $state["strong"] ) {
521 if ( $state["em"] && $state["em"] > $state["strong"] )
522 {
523 # ''' lala ''lala '''
524 $s = "</em></strong><em>";
525 } else {
526 $s = "</strong>";
527 }
528 $state["strong"] = FALSE;
529 } else {
530 $s = "<strong>";
531 $state["strong"] = $token["pos"];
532 }
533 return $s;
534 }
535
536 /* private */ function handle2Quotes( &$state, $token )
537 {
538 if ( $state["em"] ) {
539 if ( $state["strong"] && $state["strong"] > $state["em"] )
540 {
541 # ''lala'''lala'' ....'''
542 $s = "</strong></em><strong>";
543 } else {
544 $s = "</em>";
545 }
546 $state["em"] = FALSE;
547 } else {
548 $s = "<em>";
549 $state["em"] = $token["pos"];
550 }
551 return $s;
552 }
553
554 /* private */ function handle5Quotes( &$state, $token )
555 {
556 if ( $state["em"] && $state["strong"] ) {
557 if ( $state["em"] < $state["strong"] ) {
558 $s .= "</strong></em>";
559 } else {
560 $s .= "</em></strong>";
561 }
562 $state["strong"] = $state["em"] = FALSE;
563 } elseif ( $state["em"] ) {
564 $s .= "</em><strong>";
565 $state["em"] = FALSE;
566 $state["strong"] = $token["pos"];
567 } elseif ( $state["strong"] ) {
568 $s .= "</strong><em>";
569 $state["strong"] = FALSE;
570 $state["em"] = $token["pos"];
571 } else { # not $em and not $strong
572 $s .= "<strong><em>";
573 $state["strong"] = $state["em"] = $token["pos"];
574 }
575 return $s;
576 }
577
578 /* private */ function replaceInternalLinks( $str )
579 {
580 global $wgLang; # for language specific parser hook
581
582 $tokenizer=Tokenizer::newFromString( $str );
583 $tokenStack = array();
584
585 $s="";
586 $state["em"] = FALSE;
587 $state["strong"] = FALSE;
588 $tagIsOpen = FALSE;
589
590 # The tokenizer splits the text into tokens and returns them one by one.
591 # Every call to the tokenizer returns a new token.
592 while ( $token = $tokenizer->nextToken() )
593 {
594 switch ( $token["type"] )
595 {
596 case "text":
597 # simple text with no further markup
598 $txt = $token["text"];
599 break;
600 case "[[":
601 # link opening tag.
602 # FIXME : Treat orphaned open tags (stack not empty when text is over)
603 $tagIsOpen = TRUE;
604 array_push( $tokenStack, $token );
605 $txt="";
606 break;
607 case "]]":
608 # link close tag.
609 # get text from stack, glue it together, and call the code to handle a
610 # link
611 if ( count( $tokenStack ) == 0 )
612 {
613 # stack empty. Found a ]] without an opening [[
614 $txt = "]]";
615 } else {
616 $linkText = "";
617 $lastToken = array_pop( $tokenStack );
618 while ( $lastToken["type"] != "[[" )
619 {
620 if( !empty( $lastToken["text"] ) ) {
621 $linkText = $lastToken["text"] . $linkText;
622 }
623 $lastToken = array_pop( $tokenStack );
624 }
625 $txt = $linkText ."]]";
626 if( isset( $lastToken["text"] ) ) {
627 $prefix = $lastToken["text"];
628 } else {
629 $prefix = "";
630 }
631 $nextToken = $tokenizer->previewToken();
632 if ( $nextToken["type"] == "text" )
633 {
634 # Preview just looks at it. Now we have to fetch it.
635 $nextToken = $tokenizer->nextToken();
636 $txt .= $nextToken["text"];
637 }
638 $txt = $this->handleInternalLink( $txt, $prefix );
639 }
640 $tagIsOpen = (count( $tokenStack ) != 0);
641 break;
642 case "----":
643 $txt = "\n<hr>\n";
644 break;
645 case "'''":
646 # This and the three next ones handle quotes
647 $txt = $this->handle3Quotes( $state, $token );
648 break;
649 case "''":
650 $txt = $this->handle2Quotes( $state, $token );
651 break;
652 case "'''''":
653 $txt = $this->handle5Quotes( $state, $token );
654 break;
655 case "":
656 # empty token
657 $txt="";
658 break;
659 case "RFC ":
660 if ( $tagIsOpen ) {
661 $txt = "RFC ";
662 } else {
663 $txt = $this->doMagicRFC( $tokenizer );
664 }
665 break;
666 case "ISBN ":
667 if ( $tagIsOpen ) {
668 $txt = "ISBN ";
669 } else {
670 $txt = $this->doMagicISBN( $tokenizer );
671 }
672 break;
673 default:
674 # Call language specific Hook.
675 $txt = $wgLang->processToken( $token, $tokenStack );
676 if ( NULL == $txt ) {
677 # An unkown token. Highlight.
678 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
679 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
680 }
681 break;
682 }
683 # If we're parsing the interior of a link, don't append the interior to $s,
684 # but push it to the stack so it can be processed when a ]] token is found.
685 if ( $tagIsOpen && $txt != "" ) {
686 $token["type"] = "text";
687 $token["text"] = $txt;
688 array_push( $tokenStack, $token );
689 } else {
690 $s .= $txt;
691 }
692 } #end while
693 if ( count( $tokenStack ) != 0 )
694 {
695 # still objects on stack. opened [[ tag without closing ]] tag.
696 $txt = "";
697 while ( $lastToken = array_pop( $tokenStack ) )
698 {
699 if ( $lastToken["type"] == "text" )
700 {
701 $txt = $lastToken["text"] . $txt;
702 } else {
703 $txt = $lastToken["type"] . $txt;
704 }
705 }
706 $s .= $txt;
707 }
708 return $s;
709 }
710
711 /* private */ function handleInternalLink( $line, $prefix )
712 {
713 global $wgLang, $wgLinkCache;
714 global $wgNamespacesWithSubpages, $wgLanguageCode;
715 static $fname = "OutputPage::replaceInternalLinks" ;
716 wfProfileIn( $fname );
717
718 wfProfileIn( "$fname-setup" );
719 static $tc = FALSE;
720 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
721 $sk =& $this->mOptions->getSkin();
722
723 # Match a link having the form [[namespace:link|alternate]]trail
724 static $e1 = FALSE;
725 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
726 # Match the end of a line for a word that's not followed by whitespace,
727 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
728 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
729 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
730 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
731
732
733 # Special and Media are pseudo-namespaces; no pages actually exist in them
734 static $image = FALSE;
735 static $special = FALSE;
736 static $media = FALSE;
737 static $category = FALSE;
738 if ( !$image ) { $image = Namespace::getImage(); }
739 if ( !$special ) { $special = Namespace::getSpecial(); }
740 if ( !$media ) { $media = Namespace::getMedia(); }
741 if ( !$category ) { $category = wfMsg ( "category" ) ; }
742
743 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
744
745 wfProfileOut( "$fname-setup" );
746 $s = "";
747
748 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
749 $text = $m[2];
750 $trail = $m[3];
751 } else { # Invalid form; output directly
752 $s .= $prefix . "[[" . $line ;
753 return $s;
754 }
755
756 /* Valid link forms:
757 Foobar -- normal
758 :Foobar -- override special treatment of prefix (images, language links)
759 /Foobar -- convert to CurrentPage/Foobar
760 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
761 */
762 $c = substr($m[1],0,1);
763 $noforce = ($c != ":");
764 if( $c == "/" ) { # subpage
765 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
766 $m[1]=substr($m[1],1,strlen($m[1])-2);
767 $noslash=$m[1];
768 } else {
769 $noslash=substr($m[1],1);
770 }
771 if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
772 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
773 if( "" == $text ) {
774 $text= $m[1];
775 } # this might be changed for ugliness reasons
776 } else {
777 $link = $noslash; # no subpage allowed, use standard link
778 }
779 } elseif( $noforce ) { # no subpage
780 $link = $m[1];
781 } else {
782 $link = substr( $m[1], 1 );
783 }
784 if( "" == $text )
785 $text = $link;
786
787 $nt = Title::newFromText( $link );
788 if( !$nt ) {
789 $s .= $prefix . "[[" . $line;
790 return $s;
791 }
792 $ns = $nt->getNamespace();
793 $iw = $nt->getInterWiki();
794 if( $noforce ) {
795 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
796 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
797 $s .= $prefix . $trail;
798 return $s;
799 }
800 if( $ns == $image ) {
801 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
802 $wgLinkCache->addImageLinkObj( $nt );
803 return $s;
804 }
805 }
806 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
807 ( strpos( $link, "#" ) == FALSE ) ) {
808 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
809 return $s;
810 }
811 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
812 $t = explode ( ":" , $nt->getText() ) ;
813 array_shift ( $t ) ;
814 $t = implode ( ":" , $t ) ;
815 $t = $wgLang->ucFirst ( $t ) ;
816 # $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
817 $nnt = Title::newFromText ( $category.":".$t ) ;
818 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
819 $this->mCategoryLinks[] = $t ;
820 $s .= $prefix . $trail ;
821 return $s ;
822 }
823 if( $ns == $media ) {
824 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
825 $wgLinkCache->addImageLinkObj( $nt );
826 return $s;
827 } elseif( $ns == $special ) {
828 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
829 return $s;
830 }
831 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
832
833 wfProfileOut( $fname );
834 return $s;
835 }
836
837 # Some functions here used by doBlockLevels()
838 #
839 /* private */ function closeParagraph()
840 {
841 $result = "";
842 if ( 0 != strcmp( "p", $this->mLastSection ) &&
843 0 != strcmp( "", $this->mLastSection ) ) {
844 $result = "</" . $this->mLastSection . ">";
845 }
846 $this->mLastSection = "";
847 return $result."\n";
848 }
849 # getCommon() returns the length of the longest common substring
850 # of both arguments, starting at the beginning of both.
851 #
852 /* private */ function getCommon( $st1, $st2 )
853 {
854 $fl = strlen( $st1 );
855 $shorter = strlen( $st2 );
856 if ( $fl < $shorter ) { $shorter = $fl; }
857
858 for ( $i = 0; $i < $shorter; ++$i ) {
859 if ( $st1{$i} != $st2{$i} ) { break; }
860 }
861 return $i;
862 }
863 # These next three functions open, continue, and close the list
864 # element appropriate to the prefix character passed into them.
865 #
866 /* private */ function openList( $char )
867 {
868 $result = $this->closeParagraph();
869
870 if ( "*" == $char ) { $result .= "<ul><li>"; }
871 else if ( "#" == $char ) { $result .= "<ol><li>"; }
872 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
873 else if ( ";" == $char ) {
874 $result .= "<dl><dt>";
875 $this->mDTopen = true;
876 }
877 else { $result = "<!-- ERR 1 -->"; }
878
879 return $result;
880 }
881
882 /* private */ function nextItem( $char )
883 {
884 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
885 else if ( ":" == $char || ";" == $char ) {
886 $close = "</dd>";
887 if ( $this->mDTopen ) { $close = "</dt>"; }
888 if ( ";" == $char ) {
889 $this->mDTopen = true;
890 return $close . "<dt>";
891 } else {
892 $this->mDTopen = false;
893 return $close . "<dd>";
894 }
895 }
896 return "<!-- ERR 2 -->";
897 }
898
899 /* private */function closeList( $char )
900 {
901 if ( "*" == $char ) { $text = "</li></ul>"; }
902 else if ( "#" == $char ) { $text = "</li></ol>"; }
903 else if ( ":" == $char ) {
904 if ( $this->mDTopen ) {
905 $this->mDTopen = false;
906 $text = "</dt></dl>";
907 } else {
908 $text = "</dd></dl>";
909 }
910 }
911 else { return "<!-- ERR 3 -->"; }
912 return $text."\n";
913 }
914
915 /* private */ function doBlockLevels( $text, $linestart )
916 {
917 $fname = "OutputPage::doBlockLevels";
918 wfProfileIn( $fname );
919 # Parsing through the text line by line. The main thing
920 # happening here is handling of block-level elements p, pre,
921 # and making lists from lines starting with * # : etc.
922 #
923 $a = explode( "\n", $text );
924 $text = $lastPref = "";
925 $this->mDTopen = $inBlockElem = false;
926
927 if ( ! $linestart ) { $text .= array_shift( $a ); }
928 foreach ( $a as $t ) {
929 if ( "" != $text ) { $text .= "\n"; }
930
931 $oLine = $t;
932 $opl = strlen( $lastPref );
933 $npl = strspn( $t, "*#:;" );
934 $pref = substr( $t, 0, $npl );
935 $pref2 = str_replace( ";", ":", $pref );
936 $t = substr( $t, $npl );
937
938 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
939 $text .= $this->nextItem( substr( $pref, -1 ) );
940
941 if ( ";" == substr( $pref, -1 ) ) {
942 $cpos = strpos( $t, ":" );
943 if ( ! ( false === $cpos ) ) {
944 $term = substr( $t, 0, $cpos );
945 $text .= $term . $this->nextItem( ":" );
946 $t = substr( $t, $cpos + 1 );
947 }
948 }
949 } else if (0 != $npl || 0 != $opl) {
950 $cpl = $this->getCommon( $pref, $lastPref );
951
952 while ( $cpl < $opl ) {
953 $text .= $this->closeList( $lastPref{$opl-1} );
954 --$opl;
955 }
956 if ( $npl <= $cpl && $cpl > 0 ) {
957 $text .= $this->nextItem( $pref{$cpl-1} );
958 }
959 while ( $npl > $cpl ) {
960 $char = substr( $pref, $cpl, 1 );
961 $text .= $this->openList( $char );
962
963 if ( ";" == $char ) {
964 $cpos = strpos( $t, ":" );
965 if ( ! ( false === $cpos ) ) {
966 $term = substr( $t, 0, $cpos );
967 $text .= $term . $this->nextItem( ":" );
968 $t = substr( $t, $cpos + 1 );
969 }
970 }
971 ++$cpl;
972 }
973 $lastPref = $pref2;
974 }
975 if ( 0 == $npl ) { # No prefix--go to paragraph mode
976 if ( preg_match(
977 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
978 $text .= $this->closeParagraph();
979 $inBlockElem = true;
980 }
981 if ( ! $inBlockElem ) {
982 if ( " " == $t{0} ) {
983 $newSection = "pre";
984 # $t = wfEscapeHTML( $t );
985 }
986 else { $newSection = "p"; }
987
988 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
989 $text .= $this->closeParagraph();
990 $text .= "<" . $newSection . ">";
991 } else if ( 0 != strcmp( $this->mLastSection,
992 $newSection ) ) {
993 $text .= $this->closeParagraph();
994 if ( 0 != strcmp( "p", $newSection ) ) {
995 $text .= "<" . $newSection . ">";
996 }
997 }
998 $this->mLastSection = $newSection;
999 }
1000 if ( $inBlockElem &&
1001 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
1002 $inBlockElem = false;
1003 }
1004 }
1005 $text .= $t;
1006 }
1007 while ( $npl ) {
1008 $text .= $this->closeList( $pref2{$npl-1} );
1009 --$npl;
1010 }
1011 if ( "" != $this->mLastSection ) {
1012 if ( "p" != $this->mLastSection ) {
1013 $text .= "</" . $this->mLastSection . ">";
1014 }
1015 $this->mLastSection = "";
1016 }
1017 wfProfileOut( $fname );
1018 return $text;
1019 }
1020
1021 /* private */ function replaceVariables( $text )
1022 {
1023 global $wgLang, $wgCurOut;
1024 $fname = "OutputPage::replaceVariables";
1025 wfProfileIn( $fname );
1026
1027 $magic = array();
1028
1029 # Basic variables
1030 # See Language.php for the definition of each magic word
1031 # As with sigs, this uses the server's local time -- ensure
1032 # this is appropriate for your audience!
1033
1034 $magic[MAG_CURRENTMONTH] = date( "m" );
1035 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
1036 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
1037 $magic[MAG_CURRENTDAY] = date("j");
1038 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
1039 $magic[MAG_CURRENTYEAR] = date( "Y" );
1040 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
1041
1042 $this->mOutput->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
1043
1044 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
1045 if ( $mw->match( $text ) ) {
1046 $v = wfNumberOfArticles();
1047 $text = $mw->replace( $v, $text );
1048 if( $mw->getWasModified() ) { $this->mOutput->mContainsOldMagic++; }
1049 }
1050
1051 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
1052 # The callbacks are at the bottom of this file
1053 $wgCurOut = $this;
1054 $mw =& MagicWord::get( MAG_MSG );
1055 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1056 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1057
1058 $mw =& MagicWord::get( MAG_MSGNW );
1059 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1060 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1061
1062 wfProfileOut( $fname );
1063 return $text;
1064 }
1065
1066 # Cleans up HTML, removes dangerous tags and attributes
1067 /* private */ function removeHTMLtags( $text )
1068 {
1069 $fname = "OutputPage::removeHTMLtags";
1070 wfProfileIn( $fname );
1071 $htmlpairs = array( # Tags that must be closed
1072 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1073 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1074 "strike", "strong", "tt", "var", "div", "center",
1075 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1076 "ruby", "rt" , "rb" , "rp"
1077 );
1078 $htmlsingle = array(
1079 "br", "p", "hr", "li", "dt", "dd"
1080 );
1081 $htmlnest = array( # Tags that can be nested--??
1082 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1083 "dl", "font", "big", "small", "sub", "sup"
1084 );
1085 $tabletags = array( # Can only appear inside table
1086 "td", "th", "tr"
1087 );
1088
1089 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1090 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1091
1092 $htmlattrs = $this->getHTMLattrs () ;
1093
1094 # Remove HTML comments
1095 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1096
1097 $bits = explode( "<", $text );
1098 $text = array_shift( $bits );
1099 $tagstack = array(); $tablestack = array();
1100
1101 foreach ( $bits as $x ) {
1102 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1103 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1104 $x, $regs );
1105 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1106 error_reporting( $prev );
1107
1108 $badtag = 0 ;
1109 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1110 # Check our stack
1111 if ( $slash ) {
1112 # Closing a tag...
1113 if ( ! in_array( $t, $htmlsingle ) &&
1114 ( $ot = array_pop( $tagstack ) ) != $t ) {
1115 array_push( $tagstack, $ot );
1116 $badtag = 1;
1117 } else {
1118 if ( $t == "table" ) {
1119 $tagstack = array_pop( $tablestack );
1120 }
1121 $newparams = "";
1122 }
1123 } else {
1124 # Keep track for later
1125 if ( in_array( $t, $tabletags ) &&
1126 ! in_array( "table", $tagstack ) ) {
1127 $badtag = 1;
1128 } else if ( in_array( $t, $tagstack ) &&
1129 ! in_array ( $t , $htmlnest ) ) {
1130 $badtag = 1 ;
1131 } else if ( ! in_array( $t, $htmlsingle ) ) {
1132 if ( $t == "table" ) {
1133 array_push( $tablestack, $tagstack );
1134 $tagstack = array();
1135 }
1136 array_push( $tagstack, $t );
1137 }
1138 # Strip non-approved attributes from the tag
1139 $newparams = $this->fixTagAttributes($params);
1140
1141 }
1142 if ( ! $badtag ) {
1143 $rest = str_replace( ">", "&gt;", $rest );
1144 $text .= "<$slash$t $newparams$brace$rest";
1145 continue;
1146 }
1147 }
1148 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1149 }
1150 # Close off any remaining tags
1151 while ( $t = array_pop( $tagstack ) ) {
1152 $text .= "</$t>\n";
1153 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1154 }
1155 wfProfileOut( $fname );
1156 return $text;
1157 }
1158
1159 /*
1160 *
1161 * This function accomplishes several tasks:
1162 * 1) Auto-number headings if that option is enabled
1163 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1164 * 3) Add a Table of contents on the top for users who have enabled the option
1165 * 4) Auto-anchor headings
1166 *
1167 * It loops through all headlines, collects the necessary data, then splits up the
1168 * string and re-inserts the newly formatted headlines.
1169 *
1170 * */
1171 /* private */ function formatHeadings( $text )
1172 {
1173 $nh=$this->mOptions->getNumberHeadings();
1174 $st=$this->mOptions->getShowToc();
1175 if(!$this->mTitle->userCanEdit()) {
1176 $es=0;
1177 $esr=0;
1178 } else {
1179 $es=$this->mOptions->getEditSection();
1180 $esr=$this->mOptions->getEditSectionOnRightClick();
1181 }
1182
1183 # Inhibit editsection links if requested in the page
1184 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1185 if ($esw->matchAndRemove( $text )) {
1186 $es=0;
1187 }
1188 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1189 # do not add TOC
1190 $mw =& MagicWord::get( MAG_NOTOC );
1191 if ($mw->matchAndRemove( $text ))
1192 {
1193 $st = 0;
1194 }
1195
1196 # never add the TOC to the Main Page. This is an entry page that should not
1197 # be more than 1-2 screens large anyway
1198 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1199
1200 # We need this to perform operations on the HTML
1201 $sk =& $this->mOptions->getSkin();
1202
1203 # Get all headlines for numbering them and adding funky stuff like [edit]
1204 # links
1205 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1206
1207 # headline counter
1208 $c=0;
1209
1210 # Ugh .. the TOC should have neat indentation levels which can be
1211 # passed to the skin functions. These are determined here
1212 $toclevel = 0;
1213 $toc = "";
1214 $full = "";
1215 $head = array();
1216 foreach($matches[3] as $headline) {
1217 if($level) { $prevlevel=$level;}
1218 $level=$matches[1][$c];
1219 if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1220
1221 $h[$level]=0; // reset when we enter a new level
1222 $toc.=$sk->tocIndent($level-$prevlevel);
1223 $toclevel+=$level-$prevlevel;
1224
1225 }
1226 if(($nh||$st) && $level<$prevlevel) {
1227 $h[$level+1]=0; // reset when we step back a level
1228 $toc.=$sk->tocUnindent($prevlevel-$level);
1229 $toclevel-=$prevlevel-$level;
1230
1231 }
1232 $h[$level]++; // count number of headlines for each level
1233
1234 if($nh||$st) {
1235 for($i=1;$i<=$level;$i++) {
1236 if($h[$i]) {
1237 if($dot) {$numbering.=".";}
1238 $numbering.=$h[$i];
1239 $dot=1;
1240 }
1241 }
1242 }
1243
1244 // The canonized header is a version of the header text safe to use for links
1245 // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1246 $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1247 $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1248 $tocline = trim( $canonized_headline );
1249 $canonized_headline=str_replace('"',"",$canonized_headline);
1250 $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1251 $refer[$c]=$canonized_headline;
1252 $refers[$canonized_headline]++; // count how many in assoc. array so we can track dupes in anchors
1253 $refcount[$c]=$refers[$canonized_headline];
1254
1255 // Prepend the number to the heading text
1256
1257 if($nh||$st) {
1258 $tocline=$numbering ." ". $tocline;
1259
1260 // Don't number the heading if it is the only one (looks silly)
1261 if($nh && count($matches[3]) > 1) {
1262 $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1263 }
1264 }
1265
1266 // Create the anchor for linking from the TOC to the section
1267 $anchor=$canonized_headline;
1268 if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1269 if($st) {
1270 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1271 }
1272 if($es) {
1273 $head[$c].=$sk->editSectionLink($c+1);
1274 }
1275
1276 // Put it all together
1277
1278 $head[$c].="<h".$level.$matches[2][$c]
1279 ."<a name=\"".$anchor."\">"
1280 .$headline
1281 ."</a>"
1282 ."</h".$level.">";
1283
1284 // Add the edit section link
1285
1286 if($esr) {
1287 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1288 }
1289
1290 $numbering="";
1291 $c++;
1292 $dot=0;
1293 }
1294
1295 if($st) {
1296 $toclines=$c;
1297 $toc.=$sk->tocUnindent($toclevel);
1298 $toc=$sk->tocTable($toc);
1299 }
1300
1301 // split up and insert constructed headlines
1302
1303 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1304 $i=0;
1305
1306 foreach($blocks as $block) {
1307 if(($es) && $c>0 && $i==0) {
1308 # This is the [edit] link that appears for the top block of text when
1309 # section editing is enabled
1310 $full.=$sk->editSectionLink(0);
1311 }
1312 $full.=$block;
1313 if($st && $toclines>3 && !$i) {
1314 # Let's add a top anchor just in case we want to link to the top of the page
1315 $full="<a name=\"top\"></a>".$full.$toc;
1316 }
1317
1318 if( !empty( $head[$i] ) ) {
1319 $full .= $head[$i];
1320 }
1321 $i++;
1322 }
1323
1324 return $full;
1325 }
1326
1327 /* private */ function doMagicISBN( &$tokenizer )
1328 {
1329 global $wgLang;
1330
1331 # Check whether next token is a text token
1332 # If yes, fetch it and convert the text into a
1333 # Special::BookSources link
1334 $token = $tokenizer->previewToken();
1335 while ( $token["type"] == "" )
1336 {
1337 $tokenizer->nextToken();
1338 $token = $tokenizer->previewToken();
1339 }
1340 if ( $token["type"] == "text" )
1341 {
1342 $token = $tokenizer->nextToken();
1343 $x = $token["text"];
1344 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1345
1346 $isbn = $blank = "" ;
1347 while ( " " == $x{0} ) {
1348 $blank .= " ";
1349 $x = substr( $x, 1 );
1350 }
1351 while ( strstr( $valid, $x{0} ) != false ) {
1352 $isbn .= $x{0};
1353 $x = substr( $x, 1 );
1354 }
1355 $num = str_replace( "-", "", $isbn );
1356 $num = str_replace( " ", "", $num );
1357
1358 if ( "" == $num ) {
1359 $text = "ISBN $blank$x";
1360 } else {
1361 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1362 $text = "<a href=\"" .
1363 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1364 "\" class=\"internal\">ISBN $isbn</a>";
1365 $text .= $x;
1366 }
1367 } else {
1368 $text = "ISBN ";
1369 }
1370 return $text;
1371 }
1372 /* private */ function doMagicRFC( &$tokenizer )
1373 {
1374 global $wgLang;
1375
1376 # Check whether next token is a text token
1377 # If yes, fetch it and convert the text into a
1378 # link to an RFC source
1379 $token = $tokenizer->previewToken();
1380 while ( $token["type"] == "" )
1381 {
1382 $tokenizer->nextToken();
1383 $token = $tokenizer->previewToken();
1384 }
1385 if ( $token["type"] == "text" )
1386 {
1387 $token = $tokenizer->nextToken();
1388 $x = $token["text"];
1389 $valid = "0123456789";
1390
1391 $rfc = $blank = "" ;
1392 while ( " " == $x{0} ) {
1393 $blank .= " ";
1394 $x = substr( $x, 1 );
1395 }
1396 while ( strstr( $valid, $x{0} ) != false ) {
1397 $rfc .= $x{0};
1398 $x = substr( $x, 1 );
1399 }
1400
1401 if ( "" == $rfc ) {
1402 $text .= "RFC $blank$x";
1403 } else {
1404 $url = wfmsg( "rfcurl" );
1405 $url = str_replace( "$1", $rfc, $url);
1406 $sk =& $this->mOptions->getSkin();
1407 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1408 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1409 }
1410 } else {
1411 $text = "RFC ";
1412 }
1413 return $text;
1414 }
1415
1416 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1417 {
1418 $this->mOptions = $options;
1419 $this->mTitle = $title;
1420 if ( $clearState ) {
1421 $this->clearState();
1422 }
1423
1424 $stripState = false;
1425 $text = str_replace("\r\n", "\n", $text);
1426 $text = $this->strip( $text, $stripState, false );
1427 $text = $this->pstPass2( $text, $user );
1428 $text = $this->unstrip( $text, $stripState );
1429 return $text;
1430 }
1431
1432 /* private */ function pstPass2( $text, &$user )
1433 {
1434 global $wgLang, $wgLocaltimezone;
1435
1436 # Signatures
1437 #
1438 $n = $user->getName();
1439 $k = $user->getOption( "nickname" );
1440 if ( "" == $k ) { $k = $n; }
1441 if(isset($wgLocaltimezone)) {
1442 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1443 }
1444 /* Note: this is an ugly timezone hack for the European wikis */
1445 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1446 " (" . date( "T" ) . ")";
1447 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1448
1449 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1450 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1451 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1452 Namespace::getUser() ) . ":$n|$k]]", $text );
1453
1454 # Context links: [[|name]] and [[name (context)|]]
1455 #
1456 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1457 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1458 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1459 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1460
1461 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1462 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1463 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1464 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1465 # [[ns:page (cont)|]]
1466 $context = "";
1467 $t = $this->mTitle->getText();
1468 if ( preg_match( $conpat, $t, $m ) ) {
1469 $context = $m[2];
1470 }
1471 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1472 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1473 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1474
1475 if ( "" == $context ) {
1476 $text = preg_replace( $p2, "[[\\1]]", $text );
1477 } else {
1478 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1479 }
1480
1481 # {{SUBST:xxx}} variables
1482 #
1483 $mw =& MagicWord::get( MAG_SUBST );
1484 $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
1485
1486 # Trim trailing whitespace
1487 # MAG_END (__END__) tag allows for trailing
1488 # whitespace to be deliberately included
1489 $text = rtrim( $text );
1490 $mw =& MagicWord::get( MAG_END );
1491 $mw->matchAndRemove( $text );
1492
1493 return $text;
1494 }
1495
1496
1497 }
1498
1499 class ParserOutput
1500 {
1501 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1502
1503 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1504 $containsOldMagic = false )
1505 {
1506 $this->mText = $text;
1507 $this->mLanguageLinks = $languageLinks;
1508 $this->mCategoryLinks = $categoryLinks;
1509 $this->mContainsOldMagic = $containsOldMagic;
1510 }
1511
1512 function getText() { return $this->mText; }
1513 function getLanguageLinks() { return $this->mLanguageLinks; }
1514 function getCategoryLinks() { return $this->mCategoryLinks; }
1515 function containsOldMagic() { return $this->mContainsOldMagic; }
1516 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1517 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1518 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1519 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1520 }
1521
1522 class ParserOptions
1523 {
1524 # All variables are private
1525 var $mUseTeX; # Use texvc to expand <math> tags
1526 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
1527 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
1528 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
1529 var $mAllowExternalImages; # Allow external images inline
1530 var $mSkin; # Reference to the preferred skin
1531 var $mDateFormat; # Date format index
1532 var $mEditSection; # Create "edit section" links
1533 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
1534 var $mPrintable; # Generate printable output
1535 var $mNumberHeadings; # Automatically number headings
1536 var $mShowToc; # Show table of contents
1537
1538 function getUseTeX() { return $this->mUseTeX; }
1539 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1540 function getUseDynamicDates() { return $this->mUseDynamicDates; }
1541 function getInterwikiMagic() { return $this->mInterwikiMagic; }
1542 function getAllowExternalImages() { return $this->mAllowExternalImages; }
1543 function getSkin() { return $this->mSkin; }
1544 function getDateFormat() { return $this->mDateFormat; }
1545 function getEditSection() { return $this->mEditSection; }
1546 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1547 function getPrintable() { return $this->mPrintable; }
1548 function getNumberHeadings() { return $this->mNumberHeadings; }
1549 function getShowToc() { return $this->mShowToc; }
1550
1551 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1552 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1553 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1554 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1555 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1556 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1557 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1558 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1559 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1560 function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1561 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1562 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1563
1564 /* static */ function newFromUser( &$user )
1565 {
1566 $popts = new ParserOptions;
1567 $popts->initialiseFromUser( &$user );
1568 return $popts;
1569 }
1570
1571 function initialiseFromUser( &$userInput )
1572 {
1573 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1574
1575 if ( !$userInput ) {
1576 $user = new User;
1577 } else {
1578 $user =& $userInput;
1579 }
1580
1581 $this->mUseTeX = $wgUseTeX;
1582 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1583 $this->mUseDynamicDates = $wgUseDynamicDates;
1584 $this->mInterwikiMagic = $wgInterwikiMagic;
1585 $this->mAllowExternalImages = $wgAllowExternalImages;
1586 $this->mSkin =& $user->getSkin();
1587 $this->mDateFormat = $user->getOption( "date" );
1588 $this->mEditSection = $user->getOption( "editsection" );
1589 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1590 $this->mPrintable = false;
1591 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1592 $this->mShowToc = $user->getOption( "showtoc" );
1593 }
1594
1595
1596 }
1597
1598 # Regex callbacks, used in OutputPage::replaceVariables
1599
1600 # Just get rid of the dangerous stuff
1601 # Necessary because replaceVariables is called after removeHTMLtags,
1602 # and message text can come from any user
1603 function wfReplaceMsgVar( $matches ) {
1604 global $wgCurOut, $wgLinkCache;
1605 $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1606 $wgLinkCache->suspend();
1607 $text = $wgCurOut->replaceInternalLinks( $text );
1608 $wgLinkCache->resume();
1609 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1610 return $text;
1611 }
1612
1613 # Effective <nowiki></nowiki>
1614 # Not real <nowiki> because this is called after nowiki sections are processed
1615 function wfReplaceMsgnwVar( $matches ) {
1616 global $wgCurOut, $wgLinkCache;
1617 $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1618 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1619 return $text;
1620 }
1621
1622
1623
1624 ?>