Moved ISBN magic to new parser
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 include_once('Tokenizer.php');
4
5 # PHP Parser
6 #
7 # Converts wikitext to HTML.
8 #
9 # Globals used:
10 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
11 #
12 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
13 #
14 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
15 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
16 # $wgLocaltimezone
17 #
18 # * only within ParserOptions
19
20 class Parser
21 {
22 # Cleared with clearState():
23 var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
24
25 # Temporary:
26 var $mOptions, $mTitle;
27
28 function Parser()
29 {
30 $this->clearState();
31 }
32
33 function clearState()
34 {
35 $this->mOutput = new ParserOutput;
36 $this->mAutonumber = 0;
37 $this->mLastSection = "";
38 $this->mDTopen = false;
39 $this->mStripState = false;
40 }
41
42 # First pass--just handle <nowiki> sections, pass the rest off
43 # to doWikiPass2() which does all the real work.
44 #
45 # Returns a ParserOutput
46 #
47 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
48 {
49 $fname = "Parser::parse";
50 wfProfileIn( $fname );
51
52 if ( $clearState ) {
53 $this->clearState();
54 }
55
56 $this->mOptions = $options;
57 $this->mTitle =& $title;
58
59 $stripState = NULL;
60 $text = $this->strip( $text, $this->mStripState, true );
61 $text = $this->doWikiPass2( $text, $linestart );
62 $text = $this->unstrip( $text, $this->mStripState );
63
64 $this->mOutput->setText( $text );
65 wfProfileOut( $fname );
66 return $this->mOutput;
67 }
68
69 /* static */ function getRandomString()
70 {
71 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
72 }
73
74 # Strips <nowiki>, <pre> and <math>
75 # Returns the text, and fills an array with data needed in unstrip()
76 #
77 function strip( $text, &$state, $render = true )
78 {
79 $state = array(
80 'nwlist' => array(),
81 'nwsecs' => 0,
82 'nwunq' => Parser::getRandomString(),
83 'mathlist' => array(),
84 'mathsecs' => 0,
85 'mathunq' => Parser::getRandomString(),
86 'prelist' => array(),
87 'presecs' => 0,
88 'preunq' => Parser::getRandomString()
89 );
90
91 $stripped = "";
92 $stripped2 = "";
93 $stripped3 = "";
94
95 # Replace any instances of the placeholders
96 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
97 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
98 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
99
100 while ( "" != $text ) {
101 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
102 $stripped .= $p[0];
103 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
104 $text = "";
105 } else {
106 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
107 ++$state['nwsecs'];
108
109 if ( $render ) {
110 $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
111 } else {
112 $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
113 }
114
115 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
116 $text = $q[1];
117 }
118 }
119
120 if( $this->mOptions->getUseTeX() ) {
121 while ( "" != $stripped ) {
122 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
123 $stripped2 .= $p[0];
124 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
125 $stripped = "";
126 } else {
127 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
128 ++$state['mathsecs'];
129
130 if ( $render ) {
131 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
132 } else {
133 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
134 }
135
136 $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
137 $stripped = $q[1];
138 }
139 }
140 } else {
141 $stripped2 = $stripped;
142 }
143
144 while ( "" != $stripped2 ) {
145 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
146 $stripped3 .= $p[0];
147 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
148 $stripped2 = "";
149 } else {
150 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
151 ++$state['presecs'];
152
153 if ( $render ) {
154 $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
155 } else {
156 $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
157 }
158
159 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
160 $stripped2 = $q[1];
161 }
162 }
163 return $stripped3;
164 }
165
166 function unstrip( $text, &$state )
167 {
168 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
169 $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
170 }
171
172 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
173 $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
174 }
175
176 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
177 $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
178 }
179 return $text;
180 }
181
182 function categoryMagic ()
183 {
184 global $wgLang , $wgUser ;
185 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
186 $id = $this->mTitle->getArticleID() ;
187 $cat = ucfirst ( wfMsg ( "category" ) ) ;
188 $ti = $this->mTitle->getText() ;
189 $ti = explode ( ":" , $ti , 2 ) ;
190 if ( $cat != $ti[0] ) return "" ;
191 $r = "<br break=all>\n" ;
192
193 $articles = array() ;
194 $parents = array () ;
195 $children = array() ;
196
197
198 # $sk =& $this->mGetSkin();
199 $sk =& $wgUser->getSkin() ;
200
201 $doesexist = false ;
202 if ( $doesexist ) {
203 $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
204 } else {
205 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
206 }
207
208 $res = wfQuery ( $sql, DB_READ ) ;
209 while ( $x = wfFetchObject ( $res ) )
210 {
211 # $t = new Title ;
212 # $t->newFromDBkey ( $x->l_from ) ;
213 # $t = $t->getText() ;
214 if ( $doesexist ) {
215 $t = $x->l_from ;
216 } else {
217 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
218 if ( $t != "" ) $t .= ":" ;
219 $t .= $x->cur_title ;
220 }
221
222 $y = explode ( ":" , $t , 2 ) ;
223 if ( count ( $y ) == 2 && $y[0] == $cat ) {
224 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
225 } else {
226 array_push ( $articles , $sk->makeLink ( $t ) ) ;
227 }
228 }
229 wfFreeResult ( $res ) ;
230
231 # Children
232 if ( count ( $children ) > 0 )
233 {
234 asort ( $children ) ;
235 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
236 $r .= implode ( ", " , $children ) ;
237 }
238
239 # Articles
240 if ( count ( $articles ) > 0 )
241 {
242 asort ( $articles ) ;
243 $h = wfMsg( "category_header", $ti[1] );
244 $r .= "<h2>{$h}</h2>\n" ;
245 $r .= implode ( ", " , $articles ) ;
246 }
247
248
249 return $r ;
250 }
251
252 function getHTMLattrs ()
253 {
254 $htmlattrs = array( # Allowed attributes--no scripting, etc.
255 "title", "align", "lang", "dir", "width", "height",
256 "bgcolor", "clear", /* BR */ "noshade", /* HR */
257 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
258 /* FONT */ "type", "start", "value", "compact",
259 /* For various lists, mostly deprecated but safe */
260 "summary", "width", "border", "frame", "rules",
261 "cellspacing", "cellpadding", "valign", "char",
262 "charoff", "colgroup", "col", "span", "abbr", "axis",
263 "headers", "scope", "rowspan", "colspan", /* Tables */
264 "id", "class", "name", "style" /* For CSS */
265 );
266 return $htmlattrs ;
267 }
268
269 function fixTagAttributes ( $t )
270 {
271 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
272 $htmlattrs = $this->getHTMLattrs() ;
273
274 # Strip non-approved attributes from the tag
275 $t = preg_replace(
276 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
277 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
278 $t);
279 # Strip javascript "expression" from stylesheets. Brute force approach:
280 # If anythin offensive is found, all attributes of the HTML tag are dropped
281
282 if( preg_match(
283 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
284 wfMungeToUtf8( $t ) ) )
285 {
286 $t="";
287 }
288
289 return trim ( $t ) ;
290 }
291
292 function doTableStuff ( $t )
293 {
294 $t = explode ( "\n" , $t ) ;
295 $td = array () ; # Is currently a td tag open?
296 $ltd = array () ; # Was it TD or TH?
297 $tr = array () ; # Is currently a tr tag open?
298 $ltr = array () ; # tr attributes
299 foreach ( $t AS $k => $x )
300 {
301 $x = rtrim ( $x ) ;
302 $fc = substr ( $x , 0 , 1 ) ;
303 if ( "{|" == substr ( $x , 0 , 2 ) )
304 {
305 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
306 array_push ( $td , false ) ;
307 array_push ( $ltd , "" ) ;
308 array_push ( $tr , false ) ;
309 array_push ( $ltr , "" ) ;
310 }
311 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
312 else if ( "|}" == substr ( $x , 0 , 2 ) )
313 {
314 $z = "</table>\n" ;
315 $l = array_pop ( $ltd ) ;
316 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
317 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
318 array_pop ( $ltr ) ;
319 $t[$k] = $z ;
320 }
321 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
322 {
323 $z = trim ( substr ( $x , 2 ) ) ;
324 $t[$k] = "<caption>{$z}</caption>\n" ;
325 }*/
326 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
327 {
328 $x = substr ( $x , 1 ) ;
329 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
330 $z = "" ;
331 $l = array_pop ( $ltd ) ;
332 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
333 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
334 array_pop ( $ltr ) ;
335 $t[$k] = $z ;
336 array_push ( $tr , false ) ;
337 array_push ( $td , false ) ;
338 array_push ( $ltd , "" ) ;
339 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
340 }
341 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
342 {
343 if ( "|+" == substr ( $x , 0 , 2 ) )
344 {
345 $fc = "+" ;
346 $x = substr ( $x , 1 ) ;
347 }
348 $after = substr ( $x , 1 ) ;
349 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
350 $after = explode ( "||" , $after ) ;
351 $t[$k] = "" ;
352 foreach ( $after AS $theline )
353 {
354 $z = "" ;
355 if ( $fc != "+" )
356 {
357 $tra = array_pop ( $ltr ) ;
358 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
359 array_push ( $tr , true ) ;
360 array_push ( $ltr , "" ) ;
361 }
362
363 $l = array_pop ( $ltd ) ;
364 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
365 if ( $fc == "|" ) $l = "TD" ;
366 else if ( $fc == "!" ) $l = "TH" ;
367 else if ( $fc == "+" ) $l = "CAPTION" ;
368 else $l = "" ;
369 array_push ( $ltd , $l ) ;
370 $y = explode ( "|" , $theline , 2 ) ;
371 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
372 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
373 $t[$k] .= $y ;
374 array_push ( $td , true ) ;
375 }
376 }
377 }
378
379 # Closing open td, tr && table
380 while ( count ( $td ) > 0 )
381 {
382 if ( array_pop ( $td ) ) $t[] = "</td>" ;
383 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
384 $t[] = "</table>" ;
385 }
386
387 $t = implode ( "\n" , $t ) ;
388 # $t = $this->removeHTMLtags( $t );
389 return $t ;
390 }
391
392 # Well, OK, it's actually about 14 passes. But since all the
393 # hard lifting is done inside PHP's regex code, it probably
394 # wouldn't speed things up much to add a real parser.
395 #
396 function doWikiPass2( $text, $linestart )
397 {
398 $fname = "OutputPage::doWikiPass2";
399 wfProfileIn( $fname );
400
401 $text = $this->removeHTMLtags( $text );
402 $text = $this->replaceVariables( $text );
403
404 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
405 $text = str_replace ( "<HR>", "<hr>", $text );
406
407 $text = $this->doHeadings( $text );
408 $text = $this->doBlockLevels( $text, $linestart );
409
410 if($this->mOptions->getUseDynamicDates()) {
411 global $wgDateFormatter;
412 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
413 }
414
415 $text = $this->replaceExternalLinks( $text );
416 $text = $this->replaceInternalLinks ( $text );
417 $text = $this->doTableStuff ( $text ) ;
418
419 #$text = $this->magicISBN( $text );
420 $text = $this->magicRFC( $text );
421 $text = $this->formatHeadings( $text );
422
423 $sk =& $this->mOptions->getSkin();
424 $text = $sk->transformContent( $text );
425 $text .= $this->categoryMagic () ;
426
427 wfProfileOut( $fname );
428 return $text;
429 }
430
431
432 /* private */ function doHeadings( $text )
433 {
434 for ( $i = 6; $i >= 1; --$i ) {
435 $h = substr( "======", 0, $i );
436 $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
437 "<h{$i}>\\1</h{$i}>\\2", $text );
438 }
439 return $text;
440 }
441
442 # Note: we have to do external links before the internal ones,
443 # and otherwise take great care in the order of things here, so
444 # that we don't end up interpreting some URLs twice.
445
446 /* private */ function replaceExternalLinks( $text )
447 {
448 $fname = "OutputPage::replaceExternalLinks";
449 wfProfileIn( $fname );
450 $text = $this->subReplaceExternalLinks( $text, "http", true );
451 $text = $this->subReplaceExternalLinks( $text, "https", true );
452 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
453 $text = $this->subReplaceExternalLinks( $text, "irc", false );
454 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
455 $text = $this->subReplaceExternalLinks( $text, "news", false );
456 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
457 wfProfileOut( $fname );
458 return $text;
459 }
460
461 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
462 {
463 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
464 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
465
466 # this is the list of separators that should be ignored if they
467 # are the last character of an URL but that should be included
468 # if they occur within the URL, e.g. "go to www.foo.com, where .."
469 # in this case, the last comma should not become part of the URL,
470 # but in "www.foo.com/123,2342,32.htm" it should.
471 $sep = ",;\.:";
472 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
473 $images = "gif|png|jpg|jpeg";
474
475 # PLEASE NOTE: The curly braces { } are not part of the regex,
476 # they are interpreted as part of the string (used to tell PHP
477 # that the content of the string should be inserted there).
478 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
479 "((?i){$images})([^{$uc}]|$)/";
480
481 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
482 $sk =& $this->mOptions->getSkin();
483
484 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
485 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
486 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
487 }
488 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
489 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
490 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
491 "</a>\\5", $s );
492 $s = str_replace( $unique, $protocol, $s );
493
494 $a = explode( "[{$protocol}:", " " . $s );
495 $s = array_shift( $a );
496 $s = substr( $s, 1 );
497
498 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
499 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
500
501 foreach ( $a as $line ) {
502 if ( preg_match( $e1, $line, $m ) ) {
503 $link = "{$protocol}:{$m[1]}";
504 $trail = $m[2];
505 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
506 else { $text = wfEscapeHTML( $link ); }
507 } else if ( preg_match( $e2, $line, $m ) ) {
508 $link = "{$protocol}:{$m[1]}";
509 $text = $m[2];
510 $trail = $m[3];
511 } else {
512 $s .= "[{$protocol}:" . $line;
513 continue;
514 }
515 if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
516 else $paren = "";
517 $la = $sk->getExternalLinkAttributes( $link, $text );
518 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
519
520 }
521 return $s;
522 }
523
524 /* private */ function handle3Quotes( &$state, $token )
525 {
526 if ( $state["strong"] ) {
527 if ( $state["em"] && $state["em"] > $state["strong"] )
528 {
529 # ''' lala ''lala '''
530 $s = "</em></strong><em>";
531 } else {
532 $s = "</strong>";
533 }
534 $state["strong"] = FALSE;
535 } else {
536 $s = "<strong>";
537 $state["strong"] = $token["pos"];
538 }
539 return $s;
540 }
541
542 /* private */ function handle2Quotes( &$state, $token )
543 {
544 if ( $state["em"] ) {
545 if ( $state["strong"] && $state["strong"] > $state["em"] )
546 {
547 # ''lala'''lala'' ....'''
548 $s = "</strong></em><strong>";
549 } else {
550 $s = "</em>";
551 }
552 $state["em"] = FALSE;
553 } else {
554 $s = "<em>";
555 $state["em"] = $token["pos"];
556 }
557 return $s;
558 }
559
560 /* private */ function handle5Quotes( &$state, $token )
561 {
562 if ( $state["em"] && $state["strong"] ) {
563 if ( $state["em"] < $state["strong"] ) {
564 $s .= "</strong></em>";
565 } else {
566 $s .= "</em></strong>";
567 }
568 $state["strong"] = $state["em"] = FALSE;
569 } elseif ( $state["em"] ) {
570 $s .= "</em><strong>";
571 $state["em"] = FALSE;
572 $state["strong"] = $token["pos"];
573 } elseif ( $state["strong"] ) {
574 $s .= "</strong><em>";
575 $state["strong"] = FALSE;
576 $state["em"] = $token["pos"];
577 } else { # not $em and not $strong
578 $s .= "<strong><em>";
579 $state["strong"] = $state["em"] = $token["pos"];
580 }
581 return $s;
582 }
583
584 /* private */ function replaceInternalLinks( $str )
585 {
586 global $wgLang; # for language specific parser hook
587
588 $tokenizer=Tokenizer::newFromString( $str );
589 $tokenStack = array();
590
591 $s="";
592 $state["em"] = FALSE;
593 $state["strong"] = FALSE;
594 $tagIsOpen = FALSE;
595
596 # The tokenizer splits the text into tokens and returns them one by one.
597 # Every call to the tokenizer returns a new token.
598 while ( $token = $tokenizer->nextToken() )
599 {
600 switch ( $token["type"] )
601 {
602 case "text":
603 # simple text with no further markup
604 $txt = $token["text"];
605 break;
606 case "[[":
607 # link opening tag.
608 # FIXME : Treat orphaned open tags (stack not empty when text is over)
609 $tagIsOpen = TRUE;
610 array_push( $tokenStack, $token );
611 $txt="";
612 break;
613 case "]]":
614 # link close tag.
615 # get text from stack, glue it together, and call the code to handle a
616 # link
617 if ( count( $tokenStack ) == 0 )
618 {
619 # stack empty. Found a ]] without an opening [[
620 $txt = "]]";
621 } else {
622 $linkText = "";
623 $lastToken = array_pop( $tokenStack );
624 while ( $lastToken["type"] != "[[" )
625 {
626 $linkText = $lastToken["text"] . $linkText;
627 $lastToken = array_pop( $tokenStack );
628 }
629 $txt = $linkText ."]]";
630 $prefix = $lastToken["text"];
631 $nextToken = $tokenizer->previewToken();
632 if ( $nextToken["type"] == "text" )
633 {
634 # Preview just looks at it. Now we have to fetch it.
635 $nextToken = $tokenizer->nextToken();
636 $txt .= $nextToken["text"];
637 }
638 $txt = $this->handleInternalLink( $txt, $prefix );
639 }
640 $tagIsOpen = (count( $tokenStack ) != 0);
641 break;
642 case "----":
643 $txt = "\n<hr>\n";
644 break;
645 case "'''":
646 # This and the three next ones handle quotes
647 $txt = $this->handle3Quotes( $state, $token );
648 break;
649 case "''":
650 $txt = $this->handle2Quotes( $state, $token );
651 break;
652 case "'''''":
653 $txt = $this->handle5Quotes( $state, $token );
654 break;
655 case "":
656 # empty token
657 $txt="";
658 break;
659 case "ISBN ":
660 $txt = $this->doMagicISBN( $tokenizer );
661 break;
662 default:
663 # Call language specific Hook.
664 $txt = $wgLang->processToken( $token, $tokenStack );
665 if ( NULL == $txt ) {
666 # An unkown token. Highlight.
667 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
668 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
669 }
670 break;
671 }
672 # If we're parsing the interior of a link, don't append the interior to $s,
673 # but push it to the stack so it can be processed when a ]] token is found.
674 if ( $tagIsOpen && $txt != "" ) {
675 $token["type"] = "text";
676 $token["text"] = $txt;
677 array_push( $tokenStack, $token );
678 } else {
679 $s .= $txt;
680 }
681 } #end while
682 if ( count( $tokenStack ) != 0 )
683 {
684 # still objects on stack. opened [[ tag without closing ]] tag.
685 $txt = "";
686 while ( $lastToken = array_pop( $tokenStack ) )
687 {
688 if ( $lastToken["type"] == "text" )
689 {
690 $txt = $lastToken["text"] . $txt;
691 } else {
692 $txt = $lastToken["type"] . $txt;
693 }
694 }
695 $s .= $txt;
696 }
697 return $s;
698 }
699
700 /* private */ function handleInternalLink( $line, $prefix )
701 {
702 global $wgLang, $wgLinkCache;
703 global $wgNamespacesWithSubpages, $wgLanguageCode;
704 static $fname = "OutputPage::replaceInternalLinks" ;
705 wfProfileIn( $fname );
706
707 wfProfileIn( "$fname-setup" );
708 static $tc = FALSE;
709 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
710 $sk =& $this->mOptions->getSkin();
711
712 # Match a link having the form [[namespace:link|alternate]]trail
713 static $e1 = FALSE;
714 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
715 # Match the end of a line for a word that's not followed by whitespace,
716 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
717 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
718 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
719 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
720
721
722 # Special and Media are pseudo-namespaces; no pages actually exist in them
723 static $image = FALSE;
724 static $special = FALSE;
725 static $media = FALSE;
726 static $category = FALSE;
727 if ( !$image ) { $image = Namespace::getImage(); }
728 if ( !$special ) { $special = Namespace::getSpecial(); }
729 if ( !$media ) { $media = Namespace::getMedia(); }
730 if ( !$category ) { $category = wfMsg ( "category" ) ; }
731
732 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
733
734 wfProfileOut( "$fname-setup" );
735
736 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
737 $text = $m[2];
738 $trail = $m[3];
739 } else { # Invalid form; output directly
740 $s .= $prefix . "[[" . $line ;
741 return $s;
742 }
743
744 /* Valid link forms:
745 Foobar -- normal
746 :Foobar -- override special treatment of prefix (images, language links)
747 /Foobar -- convert to CurrentPage/Foobar
748 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
749 */
750 $c = substr($m[1],0,1);
751 $noforce = ($c != ":");
752 if( $c == "/" ) { # subpage
753 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
754 $m[1]=substr($m[1],1,strlen($m[1])-2);
755 $noslash=$m[1];
756 } else {
757 $noslash=substr($m[1],1);
758 }
759 if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
760 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
761 if( "" == $text ) {
762 $text= $m[1];
763 } # this might be changed for ugliness reasons
764 } else {
765 $link = $noslash; # no subpage allowed, use standard link
766 }
767 } elseif( $noforce ) { # no subpage
768 $link = $m[1];
769 } else {
770 $link = substr( $m[1], 1 );
771 }
772 if( "" == $text )
773 $text = $link;
774
775 $nt = Title::newFromText( $link );
776 if( !$nt ) {
777 $s .= $prefix . "[[" . $line;
778 return $s;
779 }
780 $ns = $nt->getNamespace();
781 $iw = $nt->getInterWiki();
782 if( $noforce ) {
783 if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
784 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
785 $s .= $prefix . $trail;
786 return $s;
787 }
788 if( $ns == $image ) {
789 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
790 $wgLinkCache->addImageLinkObj( $nt );
791 return $s;
792 }
793 }
794 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
795 ( strpos( $link, "#" ) == FALSE ) ) {
796 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
797 return $s;
798 }
799 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
800 $t = explode ( ":" , $nt->getText() ) ;
801 array_shift ( $t ) ;
802 $t = implode ( ":" , $t ) ;
803 $t = $wgLang->ucFirst ( $t ) ;
804 # $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
805 $nnt = Title::newFromText ( $category.":".$t ) ;
806 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
807 $this->mCategoryLinks[] = $t ;
808 $s .= $prefix . $trail ;
809 return $s ;
810 }
811 if( $ns == $media ) {
812 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
813 $wgLinkCache->addImageLinkObj( $nt );
814 return $s;
815 } elseif( $ns == $special ) {
816 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
817 return $s;
818 }
819 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
820
821 wfProfileOut( $fname );
822 return $s;
823 }
824
825 # Some functions here used by doBlockLevels()
826 #
827 /* private */ function closeParagraph()
828 {
829 $result = "";
830 if ( 0 != strcmp( "p", $this->mLastSection ) &&
831 0 != strcmp( "", $this->mLastSection ) ) {
832 $result = "</" . $this->mLastSection . ">";
833 }
834 $this->mLastSection = "";
835 return $result."\n";
836 }
837 # getCommon() returns the length of the longest common substring
838 # of both arguments, starting at the beginning of both.
839 #
840 /* private */ function getCommon( $st1, $st2 )
841 {
842 $fl = strlen( $st1 );
843 $shorter = strlen( $st2 );
844 if ( $fl < $shorter ) { $shorter = $fl; }
845
846 for ( $i = 0; $i < $shorter; ++$i ) {
847 if ( $st1{$i} != $st2{$i} ) { break; }
848 }
849 return $i;
850 }
851 # These next three functions open, continue, and close the list
852 # element appropriate to the prefix character passed into them.
853 #
854 /* private */ function openList( $char )
855 {
856 $result = $this->closeParagraph();
857
858 if ( "*" == $char ) { $result .= "<ul><li>"; }
859 else if ( "#" == $char ) { $result .= "<ol><li>"; }
860 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
861 else if ( ";" == $char ) {
862 $result .= "<dl><dt>";
863 $this->mDTopen = true;
864 }
865 else { $result = "<!-- ERR 1 -->"; }
866
867 return $result;
868 }
869
870 /* private */ function nextItem( $char )
871 {
872 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
873 else if ( ":" == $char || ";" == $char ) {
874 $close = "</dd>";
875 if ( $this->mDTopen ) { $close = "</dt>"; }
876 if ( ";" == $char ) {
877 $this->mDTopen = true;
878 return $close . "<dt>";
879 } else {
880 $this->mDTopen = false;
881 return $close . "<dd>";
882 }
883 }
884 return "<!-- ERR 2 -->";
885 }
886
887 /* private */function closeList( $char )
888 {
889 if ( "*" == $char ) { $text = "</li></ul>"; }
890 else if ( "#" == $char ) { $text = "</li></ol>"; }
891 else if ( ":" == $char ) {
892 if ( $this->mDTopen ) {
893 $this->mDTopen = false;
894 $text = "</dt></dl>";
895 } else {
896 $text = "</dd></dl>";
897 }
898 }
899 else { return "<!-- ERR 3 -->"; }
900 return $text."\n";
901 }
902
903 /* private */ function doBlockLevels( $text, $linestart )
904 {
905 $fname = "OutputPage::doBlockLevels";
906 wfProfileIn( $fname );
907 # Parsing through the text line by line. The main thing
908 # happening here is handling of block-level elements p, pre,
909 # and making lists from lines starting with * # : etc.
910 #
911 $a = explode( "\n", $text );
912 $text = $lastPref = "";
913 $this->mDTopen = $inBlockElem = false;
914
915 if ( ! $linestart ) { $text .= array_shift( $a ); }
916 foreach ( $a as $t ) {
917 if ( "" != $text ) { $text .= "\n"; }
918
919 $oLine = $t;
920 $opl = strlen( $lastPref );
921 $npl = strspn( $t, "*#:;" );
922 $pref = substr( $t, 0, $npl );
923 $pref2 = str_replace( ";", ":", $pref );
924 $t = substr( $t, $npl );
925
926 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
927 $text .= $this->nextItem( substr( $pref, -1 ) );
928
929 if ( ";" == substr( $pref, -1 ) ) {
930 $cpos = strpos( $t, ":" );
931 if ( ! ( false === $cpos ) ) {
932 $term = substr( $t, 0, $cpos );
933 $text .= $term . $this->nextItem( ":" );
934 $t = substr( $t, $cpos + 1 );
935 }
936 }
937 } else if (0 != $npl || 0 != $opl) {
938 $cpl = $this->getCommon( $pref, $lastPref );
939
940 while ( $cpl < $opl ) {
941 $text .= $this->closeList( $lastPref{$opl-1} );
942 --$opl;
943 }
944 if ( $npl <= $cpl && $cpl > 0 ) {
945 $text .= $this->nextItem( $pref{$cpl-1} );
946 }
947 while ( $npl > $cpl ) {
948 $char = substr( $pref, $cpl, 1 );
949 $text .= $this->openList( $char );
950
951 if ( ";" == $char ) {
952 $cpos = strpos( $t, ":" );
953 if ( ! ( false === $cpos ) ) {
954 $term = substr( $t, 0, $cpos );
955 $text .= $term . $this->nextItem( ":" );
956 $t = substr( $t, $cpos + 1 );
957 }
958 }
959 ++$cpl;
960 }
961 $lastPref = $pref2;
962 }
963 if ( 0 == $npl ) { # No prefix--go to paragraph mode
964 if ( preg_match(
965 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
966 $text .= $this->closeParagraph();
967 $inBlockElem = true;
968 }
969 if ( ! $inBlockElem ) {
970 if ( " " == $t{0} ) {
971 $newSection = "pre";
972 # $t = wfEscapeHTML( $t );
973 }
974 else { $newSection = "p"; }
975
976 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
977 $text .= $this->closeParagraph();
978 $text .= "<" . $newSection . ">";
979 } else if ( 0 != strcmp( $this->mLastSection,
980 $newSection ) ) {
981 $text .= $this->closeParagraph();
982 if ( 0 != strcmp( "p", $newSection ) ) {
983 $text .= "<" . $newSection . ">";
984 }
985 }
986 $this->mLastSection = $newSection;
987 }
988 if ( $inBlockElem &&
989 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
990 $inBlockElem = false;
991 }
992 }
993 $text .= $t;
994 }
995 while ( $npl ) {
996 $text .= $this->closeList( $pref2{$npl-1} );
997 --$npl;
998 }
999 if ( "" != $this->mLastSection ) {
1000 if ( "p" != $this->mLastSection ) {
1001 $text .= "</" . $this->mLastSection . ">";
1002 }
1003 $this->mLastSection = "";
1004 }
1005 wfProfileOut( $fname );
1006 return $text;
1007 }
1008
1009 /* private */ function replaceVariables( $text )
1010 {
1011 global $wgLang, $wgCurOut;
1012 $fname = "OutputPage::replaceVariables";
1013 wfProfileIn( $fname );
1014
1015 $magic = array();
1016
1017 # Basic variables
1018 # See Language.php for the definition of each magic word
1019 # As with sigs, this uses the server's local time -- ensure
1020 # this is appropriate for your audience!
1021
1022 $magic[MAG_CURRENTMONTH] = date( "m" );
1023 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
1024 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
1025 $magic[MAG_CURRENTDAY] = date("j");
1026 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
1027 $magic[MAG_CURRENTYEAR] = date( "Y" );
1028 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
1029
1030 $this->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
1031
1032 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
1033 if ( $mw->match( $text ) ) {
1034 $v = wfNumberOfArticles();
1035 $text = $mw->replace( $v, $text );
1036 if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
1037 }
1038
1039 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
1040 # The callbacks are at the bottom of this file
1041 $wgCurOut = $this;
1042 $mw =& MagicWord::get( MAG_MSG );
1043 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1044 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1045
1046 $mw =& MagicWord::get( MAG_MSGNW );
1047 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1048 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1049
1050 wfProfileOut( $fname );
1051 return $text;
1052 }
1053
1054 # Cleans up HTML, removes dangerous tags and attributes
1055 /* private */ function removeHTMLtags( $text )
1056 {
1057 $fname = "OutputPage::removeHTMLtags";
1058 wfProfileIn( $fname );
1059 $htmlpairs = array( # Tags that must be closed
1060 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1061 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1062 "strike", "strong", "tt", "var", "div", "center",
1063 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1064 "ruby", "rt" , "rb" , "rp"
1065 );
1066 $htmlsingle = array(
1067 "br", "p", "hr", "li", "dt", "dd"
1068 );
1069 $htmlnest = array( # Tags that can be nested--??
1070 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1071 "dl", "font", "big", "small", "sub", "sup"
1072 );
1073 $tabletags = array( # Can only appear inside table
1074 "td", "th", "tr"
1075 );
1076
1077 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1078 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1079
1080 $htmlattrs = $this->getHTMLattrs () ;
1081
1082 # Remove HTML comments
1083 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1084
1085 $bits = explode( "<", $text );
1086 $text = array_shift( $bits );
1087 $tagstack = array(); $tablestack = array();
1088
1089 foreach ( $bits as $x ) {
1090 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1091 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1092 $x, $regs );
1093 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1094 error_reporting( $prev );
1095
1096 $badtag = 0 ;
1097 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1098 # Check our stack
1099 if ( $slash ) {
1100 # Closing a tag...
1101 if ( ! in_array( $t, $htmlsingle ) &&
1102 ( $ot = array_pop( $tagstack ) ) != $t ) {
1103 array_push( $tagstack, $ot );
1104 $badtag = 1;
1105 } else {
1106 if ( $t == "table" ) {
1107 $tagstack = array_pop( $tablestack );
1108 }
1109 $newparams = "";
1110 }
1111 } else {
1112 # Keep track for later
1113 if ( in_array( $t, $tabletags ) &&
1114 ! in_array( "table", $tagstack ) ) {
1115 $badtag = 1;
1116 } else if ( in_array( $t, $tagstack ) &&
1117 ! in_array ( $t , $htmlnest ) ) {
1118 $badtag = 1 ;
1119 } else if ( ! in_array( $t, $htmlsingle ) ) {
1120 if ( $t == "table" ) {
1121 array_push( $tablestack, $tagstack );
1122 $tagstack = array();
1123 }
1124 array_push( $tagstack, $t );
1125 }
1126 # Strip non-approved attributes from the tag
1127 $newparams = $this->fixTagAttributes($params);
1128
1129 }
1130 if ( ! $badtag ) {
1131 $rest = str_replace( ">", "&gt;", $rest );
1132 $text .= "<$slash$t $newparams$brace$rest";
1133 continue;
1134 }
1135 }
1136 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1137 }
1138 # Close off any remaining tags
1139 while ( $t = array_pop( $tagstack ) ) {
1140 $text .= "</$t>\n";
1141 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1142 }
1143 wfProfileOut( $fname );
1144 return $text;
1145 }
1146
1147 /*
1148 *
1149 * This function accomplishes several tasks:
1150 * 1) Auto-number headings if that option is enabled
1151 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1152 * 3) Add a Table of contents on the top for users who have enabled the option
1153 * 4) Auto-anchor headings
1154 *
1155 * It loops through all headlines, collects the necessary data, then splits up the
1156 * string and re-inserts the newly formatted headlines.
1157 *
1158 * */
1159 /* private */ function formatHeadings( $text )
1160 {
1161 $nh=$this->mOptions->getNumberHeadings();
1162 $st=$this->mOptions->getShowToc();
1163 if(!$this->mTitle->userCanEdit()) {
1164 $es=0;
1165 $esr=0;
1166 } else {
1167 $es=$this->mOptions->getEditSection();
1168 $esr=$this->mOptions->getEditSectionOnRightClick();
1169 }
1170
1171 # Inhibit editsection links if requested in the page
1172 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1173 if ($esw->matchAndRemove( $text )) {
1174 $es=0;
1175 }
1176 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1177 # do not add TOC
1178 $mw =& MagicWord::get( MAG_NOTOC );
1179 if ($mw->matchAndRemove( $text ))
1180 {
1181 $st = 0;
1182 }
1183
1184 # never add the TOC to the Main Page. This is an entry page that should not
1185 # be more than 1-2 screens large anyway
1186 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1187
1188 # We need this to perform operations on the HTML
1189 $sk =& $this->mOptions->getSkin();
1190
1191 # Get all headlines for numbering them and adding funky stuff like [edit]
1192 # links
1193 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1194
1195 # headline counter
1196 $c=0;
1197
1198 # Ugh .. the TOC should have neat indentation levels which can be
1199 # passed to the skin functions. These are determined here
1200 foreach($matches[3] as $headline) {
1201 if($level) { $prevlevel=$level;}
1202 $level=$matches[1][$c];
1203 if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1204
1205 $h[$level]=0; // reset when we enter a new level
1206 $toc.=$sk->tocIndent($level-$prevlevel);
1207 $toclevel+=$level-$prevlevel;
1208
1209 }
1210 if(($nh||$st) && $level<$prevlevel) {
1211 $h[$level+1]=0; // reset when we step back a level
1212 $toc.=$sk->tocUnindent($prevlevel-$level);
1213 $toclevel-=$prevlevel-$level;
1214
1215 }
1216 $h[$level]++; // count number of headlines for each level
1217
1218 if($nh||$st) {
1219 for($i=1;$i<=$level;$i++) {
1220 if($h[$i]) {
1221 if($dot) {$numbering.=".";}
1222 $numbering.=$h[$i];
1223 $dot=1;
1224 }
1225 }
1226 }
1227
1228 // The canonized header is a version of the header text safe to use for links
1229 // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1230 $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1231 $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1232 $tocline = trim( $canonized_headline );
1233 $canonized_headline=str_replace('"',"",$canonized_headline);
1234 $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1235 $refer[$c]=$canonized_headline;
1236 $refers[$canonized_headline]++; // count how many in assoc. array so we can track dupes in anchors
1237 $refcount[$c]=$refers[$canonized_headline];
1238
1239 // Prepend the number to the heading text
1240
1241 if($nh||$st) {
1242 $tocline=$numbering ." ". $tocline;
1243
1244 // Don't number the heading if it is the only one (looks silly)
1245 if($nh && count($matches[3]) > 1) {
1246 $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1247 }
1248 }
1249
1250 // Create the anchor for linking from the TOC to the section
1251
1252 $anchor=$canonized_headline;
1253 if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1254 if($st) {
1255 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1256 }
1257 if($es) {
1258 $head[$c].=$sk->editSectionLink($c+1);
1259 }
1260
1261 // Put it all together
1262
1263 $head[$c].="<h".$level.$matches[2][$c]
1264 ."<a name=\"".$anchor."\">"
1265 .$headline
1266 ."</a>"
1267 ."</h".$level.">";
1268
1269 // Add the edit section link
1270
1271 if($esr) {
1272 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1273 }
1274
1275 $numbering="";
1276 $c++;
1277 $dot=0;
1278 }
1279
1280 if($st) {
1281 $toclines=$c;
1282 $toc.=$sk->tocUnindent($toclevel);
1283 $toc=$sk->tocTable($toc);
1284 }
1285
1286 // split up and insert constructed headlines
1287
1288 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1289 $i=0;
1290
1291 foreach($blocks as $block) {
1292 if(($es) && $c>0 && $i==0) {
1293 # This is the [edit] link that appears for the top block of text when
1294 # section editing is enabled
1295 $full.=$sk->editSectionLink(0);
1296 }
1297 $full.=$block;
1298 if($st && $toclines>3 && !$i) {
1299 # Let's add a top anchor just in case we want to link to the top of the page
1300 $full="<a name=\"top\"></a>".$full.$toc;
1301 }
1302
1303 $full.=$head[$i];
1304 $i++;
1305 }
1306
1307 return $full;
1308 }
1309
1310 /* private */ function doMagicISBN( &$tokenizer )
1311 {
1312 global $wgLang;
1313
1314 # Check whether next token is a text token
1315 # If yes, fetch it and convert the text into a
1316 # Special::BookSources link
1317 $token = $tokenizer->previewToken();
1318 while ( $token["type"] == "" )
1319 {
1320 $tokenizer->nextToken();
1321 $token = $tokenizer->previewToken();
1322 }
1323 if ( $token["type"] == "text" )
1324 {
1325 $token = $tokenizer->nextToken();
1326 $x = $token["text"];
1327 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1328
1329 $isbn = $blank = "" ;
1330 while ( " " == $x{0} ) {
1331 $blank .= " ";
1332 $x = substr( $x, 1 );
1333 }
1334 while ( strstr( $valid, $x{0} ) != false ) {
1335 $isbn .= $x{0};
1336 $x = substr( $x, 1 );
1337 }
1338 $num = str_replace( "-", "", $isbn );
1339 $num = str_replace( " ", "", $num );
1340
1341 if ( "" == $num ) {
1342 $text .= "ISBN $blank$x";
1343 } else {
1344 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1345 $text .= "<a href=\"" .
1346 $titleObj->getUrl( "isbn={$num}", false, true ) .
1347 "\" class=\"internal\">ISBN $isbn</a>";
1348 $text .= $x;
1349 }
1350 } else {
1351 $text = "ISBN ";
1352 }
1353 return $text;
1354 }
1355
1356 /* private */ function magicRFC( $text )
1357 {
1358 return $text;
1359 }
1360
1361 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1362 {
1363 $this->mOptions = $options;
1364 $this->mTitle = $title;
1365 if ( $clearState ) {
1366 $this->clearState;
1367 }
1368
1369 $stripState = false;
1370 $text = $this->strip( $text, $stripState, false );
1371 $text = $this->pstPass2( $text, $user );
1372 $text = $this->unstrip( $text, $stripState );
1373 return $text;
1374 }
1375
1376 /* private */ function pstPass2( $text, &$user )
1377 {
1378 global $wgLang, $wgLocaltimezone;
1379
1380 # Signatures
1381 #
1382 $n = $user->getName();
1383 $k = $user->getOption( "nickname" );
1384 if ( "" == $k ) { $k = $n; }
1385 if(isset($wgLocaltimezone)) {
1386 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1387 }
1388 /* Note: this is an ugly timezone hack for the European wikis */
1389 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1390 " (" . date( "T" ) . ")";
1391 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1392
1393 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1394 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1395 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1396 Namespace::getUser() ) . ":$n|$k]]", $text );
1397
1398 # Context links: [[|name]] and [[name (context)|]]
1399 #
1400 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1401 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1402 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1403 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1404
1405 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1406 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1407 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1408 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1409 # [[ns:page (cont)|]]
1410 $context = "";
1411 $t = $this->mTitle->getText();
1412 if ( preg_match( $conpat, $t, $m ) ) {
1413 $context = $m[2];
1414 }
1415 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1416 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1417 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1418
1419 if ( "" == $context ) {
1420 $text = preg_replace( $p2, "[[\\1]]", $text );
1421 } else {
1422 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1423 }
1424
1425 # {{SUBST:xxx}} variables
1426 #
1427 $mw =& MagicWord::get( MAG_SUBST );
1428 $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
1429
1430 # Trim trailing whitespace
1431 # MAG_END (__END__) tag allows for trailing
1432 # whitespace to be deliberately included
1433 $text = rtrim( $text );
1434 $mw =& MagicWord::get( MAG_END );
1435 $mw->matchAndRemove( $text );
1436
1437 return $text;
1438 }
1439
1440
1441 }
1442
1443 class ParserOutput
1444 {
1445 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1446
1447 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1448 $containsOldMagic = false )
1449 {
1450 $this->mText = $text;
1451 $this->mLanguageLinks = $languageLinks;
1452 $this->mCategoryLinks = $categoryLinks;
1453 $this->mContainsOldMagic = $containsOldMagic;
1454 }
1455
1456 function getText() { return $this->mText; }
1457 function getLanguageLinks() { return $this->mLanguageLinks; }
1458 function getCategoryLinks() { return $this->mCategoryLinks; }
1459 function containsOldMagic() { return $this->mContainsOldMagic; }
1460 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1461 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1462 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1463 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1464 }
1465
1466 class ParserOptions
1467 {
1468 # All variables are private
1469 var $mUseTeX; # Use texvc to expand <math> tags
1470 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
1471 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
1472 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
1473 var $mAllowExternalImages; # Allow external images inline
1474 var $mSkin; # Reference to the preferred skin
1475 var $mDateFormat; # Date format index
1476 var $mEditSection; # Create "edit section" links
1477 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
1478 var $mPrintable; # Generate printable output
1479 var $mNumberHeadings; # Automatically number headings
1480 var $mShowToc; # Show table of contents
1481
1482 function getUseTeX() { return $this->mUseTeX; }
1483 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1484 function getUseDynamicDates() { return $this->mUseDynamicDates; }
1485 function getInterwikiMagic() { return $this->mInterwikiMagic; }
1486 function getAllowExternalImages() { return $this->mAllowExternalImages; }
1487 function getSkin() { return $this->mSkin; }
1488 function getDateFormat() { return $this->mDateFormat; }
1489 function getEditSection() { return $this->mEditSection; }
1490 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1491 function getPrintable() { return $this->mPrintable; }
1492 function getNumberHeadings() { return $this->mNumberHeadings; }
1493 function getShowToc() { return $this->mShowToc; }
1494
1495 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1496 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1497 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1498 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1499 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1500 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1501 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1502 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1503 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1504 function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1505 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1506 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1507
1508 /* static */ function newFromUser( &$user )
1509 {
1510 $popts = new ParserOptions;
1511 $popts->initialiseFromUser( &$user );
1512 return $popts;
1513 }
1514
1515 function initialiseFromUser( &$userInput )
1516 {
1517 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1518
1519 if ( !$userInput ) {
1520 $user = new User;
1521 } else {
1522 $user =& $userInput;
1523 }
1524
1525 $this->mUseTeX = $wgUseTeX;
1526 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1527 $this->mUseDynamicDates = $wgUseDynamicDates;
1528 $this->mInterwikiMagic = $wgInterwikiMagic;
1529 $this->mAllowExternalImages = $wgAllowExternalImages;
1530 $this->mSkin =& $user->getSkin();
1531 $this->mDateFormat = $user->getOption( "date" );
1532 $this->mEditSection = $user->getOption( "editsection" );
1533 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1534 $this->mPrintable = false;
1535 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1536 $this->mShowToc = $user->getOption( "showtoc" );
1537 }
1538
1539
1540 }
1541
1542 # Regex callbacks, used in OutputPage::replaceVariables
1543
1544 # Just get rid of the dangerous stuff
1545 # Necessary because replaceVariables is called after removeHTMLtags,
1546 # and message text can come from any user
1547 function wfReplaceMsgVar( $matches ) {
1548 global $wgCurOut, $wgLinkCache;
1549 $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1550 $wgLinkCache->suspend();
1551 $text = $wgCurOut->replaceInternalLinks( $text );
1552 $wgLinkCache->resume();
1553 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1554 return $text;
1555 }
1556
1557 # Effective <nowiki></nowiki>
1558 # Not real <nowiki> because this is called after nowiki sections are processed
1559 function wfReplaceMsgnwVar( $matches ) {
1560 global $wgCurOut, $wgLinkCache;
1561 $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1562 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1563 return $text;
1564 }
1565
1566
1567
1568 ?>