3 include_once('Tokenizer.php');
6 # objects: $wgUser, $wgTitle, $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut, $wgArticle
10 # settings: $wgUseTex, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic,
11 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgUseLinkPrefixCombination
15 var $mOutput, $mAutonumber, $mLastSection, $mDTopen;
24 $this->mOutput
= new ParserOutput
;
25 $this->mAutonumber
= 0;
26 $this->mLastSection
= "";
27 $this->mDTopen
= false;
30 # First pass--just handle <nowiki> sections, pass the rest off
31 # to doWikiPass2() which does all the real work.
33 # Returns a ParserOutput
35 function parse( $text, $linestart = true, $clearState = true )
38 $fname = "Parser::parse";
39 wfProfileIn( $fname );
40 $unique = "3iyZiyA7iMwg5rhxP0Dcc9oTnj8qD1jm1Sfv4";
41 $unique2 = "4LIQ9nXtiYFPCSfitVwDw7EYwQlL4GeeQ7qSO";
42 $unique3 = "fPaA8gDfdLBqzj68Yjg9Hil3qEF8JGO0uszIp";
57 # Replace any instances of the placeholders
58 $text = str_replace( $unique, wfHtmlEscapeFirst( $unique ), $text );
59 $text = str_replace( $unique2, wfHtmlEscapeFirst( $unique2 ), $text );
60 $text = str_replace( $unique3, wfHtmlEscapeFirst( $unique3 ), $text );
62 while ( "" != $text ) {
63 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
65 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) { $text = ""; }
67 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
69 $nwlist[$nwsecs] = wfEscapeHTMLTagsOnly($q[0]);
70 $stripped .= $unique . $nwsecs . "s";
76 while ( "" != $stripped ) {
77 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
79 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) { $stripped = ""; }
81 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
83 $mathlist[$mathsecs] = renderMath($q[0]);
84 $stripped2 .= $unique2 . $mathsecs . "s";
89 $stripped2 = $stripped;
92 while ( "" != $stripped2 ) {
93 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
95 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) { $stripped2 = ""; }
97 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
99 $prelist[$presecs] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
100 $stripped3 .= $unique3 . $presecs . "s";
105 $text = $this->doWikiPass2( $stripped3, $linestart );
107 $specialChars = array("\\", "$");
108 $escapedChars = array("\\\\", "\\$");
110 # Go backwards so that {$unique1}1 doesn't overwrite {$unique1}10
111 for ( $i = $presecs; $i >= 1; --$i ) {
112 $text = preg_replace( "/{$unique3}{$i}s/", str_replace( $specialChars,
113 $escapedChars, $prelist[$i] ), $text );
116 for ( $i = $mathsecs; $i >= 1; --$i ) {
117 $text = preg_replace( "/{$unique2}{$i}s/", str_replace( $specialChars,
118 $escapedChars, $mathlist[$i] ), $text );
121 for ( $i = $nwsecs; $i >= 1; --$i ) {
122 $text = preg_replace( "/{$unique}{$i}s/", str_replace( $specialChars,
123 $escapedChars, $nwlist[$i] ), $text );
126 $this->mOutput
->setText( $text );
127 wfProfileOut( $fname );
128 return $this->mOutput
;
131 function categoryMagic ()
133 global $wgTitle , $wgUseCategoryMagic, $wgLang ;
134 if ( !isset ( $wgUseCategoryMagic ) ||
!$wgUseCategoryMagic ) return ;
135 $id = $wgTitle->getArticleID() ;
136 $cat = ucfirst ( wfMsg ( "category" ) ) ;
137 $ti = $wgTitle->getText() ;
138 $ti = explode ( ":" , $ti , 2 ) ;
139 if ( $cat != $ti[0] ) return "" ;
140 $r = "<br break=all>\n" ;
142 $articles = array() ;
143 $parents = array () ;
144 $children = array() ;
148 $sk = $wgUser->getSkin() ;
152 $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
154 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
157 $res = wfQuery ( $sql, DB_READ
) ;
158 while ( $x = wfFetchObject ( $res ) )
161 # $t->newFromDBkey ( $x->l_from ) ;
162 # $t = $t->getText() ;
166 $t = $wgLang->getNsText ( $x->cur_namespace
) ;
167 if ( $t != "" ) $t .= ":" ;
168 $t .= $x->cur_title
;
171 $y = explode ( ":" , $t , 2 ) ;
172 if ( count ( $y ) == 2 && $y[0] == $cat ) {
173 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
175 array_push ( $articles , $sk->makeLink ( $t ) ) ;
178 wfFreeResult ( $res ) ;
181 if ( count ( $children ) > 0 )
183 asort ( $children ) ;
184 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
185 $r .= implode ( ", " , $children ) ;
189 if ( count ( $articles ) > 0 )
191 asort ( $articles ) ;
192 $h = wfMsg( "category_header", $ti[1] );
193 $r .= "<h2>{$h}</h2>\n" ;
194 $r .= implode ( ", " , $articles ) ;
201 function getHTMLattrs ()
203 $htmlattrs = array( # Allowed attributes--no scripting, etc.
204 "title", "align", "lang", "dir", "width", "height",
205 "bgcolor", "clear", /* BR */ "noshade", /* HR */
206 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
207 /* FONT */ "type", "start", "value", "compact",
208 /* For various lists, mostly deprecated but safe */
209 "summary", "width", "border", "frame", "rules",
210 "cellspacing", "cellpadding", "valign", "char",
211 "charoff", "colgroup", "col", "span", "abbr", "axis",
212 "headers", "scope", "rowspan", "colspan", /* Tables */
213 "id", "class", "name", "style" /* For CSS */
218 function fixTagAttributes ( $t )
220 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
221 $htmlattrs = $this->getHTMLattrs() ;
223 # Strip non-approved attributes from the tag
225 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
226 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
228 # Strip javascript "expression" from stylesheets. Brute force approach:
229 # If anythin offensive is found, all attributes of the HTML tag are dropped
232 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
233 wfMungeToUtf8( $t ) ) )
241 function doTableStuff ( $t )
243 $t = explode ( "\n" , $t ) ;
244 $td = array () ; # Is currently a td tag open?
245 $ltd = array () ; # Was it TD or TH?
246 $tr = array () ; # Is currently a tr tag open?
247 $ltr = array () ; # tr attributes
248 foreach ( $t AS $k => $x )
251 $fc = substr ( $x , 0 , 1 ) ;
252 if ( "{|" == substr ( $x , 0 , 2 ) )
254 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
255 array_push ( $td , false ) ;
256 array_push ( $ltd , "" ) ;
257 array_push ( $tr , false ) ;
258 array_push ( $ltr , "" ) ;
260 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
261 else if ( "|}" == substr ( $x , 0 , 2 ) )
264 $l = array_pop ( $ltd ) ;
265 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
266 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
270 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
272 $z = trim ( substr ( $x , 2 ) ) ;
273 $t[$k] = "<caption>{$z}</caption>\n" ;
275 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
277 $x = substr ( $x , 1 ) ;
278 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
280 $l = array_pop ( $ltd ) ;
281 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
282 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
285 array_push ( $tr , false ) ;
286 array_push ( $td , false ) ;
287 array_push ( $ltd , "" ) ;
288 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
290 else if ( "|" == $fc ||
"!" == $fc ||
"|+" == substr ( $x , 0 , 2 ) ) # Caption
292 if ( "|+" == substr ( $x , 0 , 2 ) )
295 $x = substr ( $x , 1 ) ;
297 $after = substr ( $x , 1 ) ;
298 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
299 $after = explode ( "||" , $after ) ;
301 foreach ( $after AS $theline )
306 $tra = array_pop ( $ltr ) ;
307 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
308 array_push ( $tr , true ) ;
309 array_push ( $ltr , "" ) ;
312 $l = array_pop ( $ltd ) ;
313 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
314 if ( $fc == "|" ) $l = "TD" ;
315 else if ( $fc == "!" ) $l = "TH" ;
316 else if ( $fc == "+" ) $l = "CAPTION" ;
318 array_push ( $ltd , $l ) ;
319 $y = explode ( "|" , $theline , 2 ) ;
320 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
321 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
323 array_push ( $td , true ) ;
328 # Closing open td, tr && table
329 while ( count ( $td ) > 0 )
331 if ( array_pop ( $td ) ) $t[] = "</td>" ;
332 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
336 $t = implode ( "\n" , $t ) ;
337 # $t = $this->removeHTMLtags( $t );
341 # Well, OK, it's actually about 14 passes. But since all the
342 # hard lifting is done inside PHP's regex code, it probably
343 # wouldn't speed things up much to add a real parser.
345 function doWikiPass2( $text, $linestart )
347 global $wgUser, $wgLang, $wgUseDynamicDates;
348 $fname = "OutputPage::doWikiPass2";
349 wfProfileIn( $fname );
351 $text = $this->removeHTMLtags( $text );
352 $text = $this->replaceVariables( $text );
354 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
355 $text = str_replace ( "<HR>", "<hr>", $text );
357 $text = $this->doHeadings( $text );
358 $text = $this->doBlockLevels( $text, $linestart );
360 if($wgUseDynamicDates) {
361 global $wgDateFormatter;
362 $text = $wgDateFormatter->reformat( $wgUser->getOption("date"), $text );
365 $text = $this->replaceExternalLinks( $text );
366 $text = $this->replaceInternalLinks ( $text );
367 $text = $this->doTableStuff ( $text ) ;
369 $text = $this->magicISBN( $text );
370 $text = $this->magicRFC( $text );
371 $text = $this->formatHeadings( $text );
373 $sk = $wgUser->getSkin();
374 $text = $sk->transformContent( $text );
375 $text .= $this->categoryMagic () ;
377 wfProfileOut( $fname );
382 /* private */ function doHeadings( $text )
384 for ( $i = 6; $i >= 1; --$i ) {
385 $h = substr( "======", 0, $i );
386 $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
387 "<h{$i}>\\1</h{$i}>\\2", $text );
392 # Note: we have to do external links before the internal ones,
393 # and otherwise take great care in the order of things here, so
394 # that we don't end up interpreting some URLs twice.
396 /* private */ function replaceExternalLinks( $text )
398 $fname = "OutputPage::replaceExternalLinks";
399 wfProfileIn( $fname );
400 $text = $this->subReplaceExternalLinks( $text, "http", true );
401 $text = $this->subReplaceExternalLinks( $text, "https", true );
402 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
403 $text = $this->subReplaceExternalLinks( $text, "irc", false );
404 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
405 $text = $this->subReplaceExternalLinks( $text, "news", false );
406 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
407 wfProfileOut( $fname );
411 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
413 global $wgUser, $printable;
414 global $wgAllowExternalImages;
417 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
418 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
420 # this is the list of separators that should be ignored if they
421 # are the last character of an URL but that should be included
422 # if they occur within the URL, e.g. "go to www.foo.com, where .."
423 # in this case, the last comma should not become part of the URL,
424 # but in "www.foo.com/123,2342,32.htm" it should.
426 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
427 $images = "gif|png|jpg|jpeg";
429 # PLEASE NOTE: The curly braces { } are not part of the regex,
430 # they are interpreted as part of the string (used to tell PHP
431 # that the content of the string should be inserted there).
432 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
433 "((?i){$images})([^{$uc}]|$)/";
435 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
436 $sk = $wgUser->getSkin();
438 if ( $autonumber and $wgAllowExternalImages) { # Use img tags only for HTTP urls
439 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
440 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
442 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
443 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
444 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
446 $s = str_replace( $unique, $protocol, $s );
448 $a = explode( "[{$protocol}:", " " . $s );
449 $s = array_shift( $a );
450 $s = substr( $s, 1 );
452 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
453 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
455 foreach ( $a as $line ) {
456 if ( preg_match( $e1, $line, $m ) ) {
457 $link = "{$protocol}:{$m[1]}";
459 if ( $autonumber ) { $text = "[" . ++
$this->mAutonumber
. "]"; }
460 else { $text = wfEscapeHTML( $link ); }
461 } else if ( preg_match( $e2, $line, $m ) ) {
462 $link = "{$protocol}:{$m[1]}";
466 $s .= "[{$protocol}:" . $line;
469 if ( $printable == "yes") $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
471 $la = $sk->getExternalLinkAttributes( $link, $text );
472 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
478 /* private */ function handle3Quotes( &$state, $token )
480 if ( $state["strong"] ) {
481 if ( $state["em"] && $state["em"] > $state["strong"] )
483 # ''' lala ''lala '''
484 $s = "</em></strong><em>";
488 $state["strong"] = FALSE;
491 $state["strong"] = $token["pos"];
496 /* private */ function handle2Quotes( &$state, $token )
498 if ( $state["em"] ) {
499 if ( $state["strong"] && $state["strong"] > $state["em"] )
501 # ''lala'''lala'' ....'''
502 $s = "</strong></em><strong>";
506 $state["em"] = FALSE;
509 $state["em"] = $token["pos"];
514 /* private */ function handle5Quotes( &$state, $token )
516 if ( $state["em"] && $state["strong"] ) {
517 if ( $state["em"] < $state["strong"] ) {
518 $s .= "</strong></em>";
520 $s .= "</em></strong>";
522 $state["strong"] = $state["em"] = FALSE;
523 } elseif ( $state["em"] ) {
524 $s .= "</em><strong>";
525 $state["em"] = FALSE;
526 $state["strong"] = $token["pos"];
527 } elseif ( $state["strong"] ) {
528 $s .= "</strong><em>";
529 $state["strong"] = FALSE;
530 $state["em"] = $token["pos"];
531 } else { # not $em and not $strong
532 $s .= "<strong><em>";
533 $state["strong"] = $state["em"] = $token["pos"];
538 /* private */ function replaceInternalLinks( $str )
540 $tokenizer=Tokenizer
::newFromString( $str );
541 $tokenStack = array();
544 $state["em"] = FALSE;
545 $state["strong"] = FALSE;
548 # The tokenizer splits the text into tokens and returns them one by one.
549 # Every call to the tokenizer returns a new token.
550 while ( $token = $tokenizer->nextToken() )
552 switch ( $token["type"] )
555 # simple text with no further markup
556 $txt = $token["text"];
560 # FIXME : Treat orphaned open tags (stack not empty when text is over)
562 array_push( $tokenStack, $token );
567 # get text from stack, glue it together, and call the code to handle a
569 if ( count( $tokenStack ) == 0 )
571 # stack empty. Found a ]] without an opening [[
575 $lastToken = array_pop( $tokenStack );
576 while ( $lastToken["type"] != "[[" )
578 $linkText = $lastToken["text"] . $linkText;
579 $lastToken = array_pop( $tokenStack );
581 $txt = $linkText ."]]";
582 $nextToken = $tokenizer->previewToken();
583 if ( $nextToken["type"] == "text" )
585 # Preview just looks at it. Now we have to fetch it.
586 $nextToken = $tokenizer->nextToken();
587 $txt .= $nextToken["text"];
589 $txt = $this->handleInternalLink( $txt );
590 #$txt = "<font color=\"#00FF00\"><b><" . $txt . "></b></font>";
592 $tagIsOpen = (count( $tokenStack ) != 0);
595 # This and the three next ones handle quotes
596 $txt = $this->handle3Quotes( $state, $token );
599 $txt = $this->handle2Quotes( $state, $token );
602 $txt = $this->handle5Quotes( $state, $token );
609 # An unkown token. Highlight.
610 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
611 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
614 # If we're parsing the interior of a link, don't append the interior to $s,
615 # but push it to the stack so it can be processed when a ]] token is found.
616 if ( $tagIsOpen && $txt != "" ) {
617 $token["type"] = "text";
618 $token["text"] = $txt;
619 array_push( $tokenStack, $token );
624 if ( count( $tokenStack ) != 0 )
626 # still objects on stack. opened [[ tag without closing ]] tag.
628 while ( $lastToken = array_pop( $tokenStack ) )
630 if ( $lastToken["type"] == "text" )
632 $txt = $lastToken["text"] . $txt;
634 $txt = $lastToken["type"] . $txt;
642 /* private */ function handleInternalLink( $line )
644 global $wgTitle, $wgUser, $wgLang;
645 global $wgLinkCache, $wgInterwikiMagic, $wgUseCategoryMagic;
646 global $wgNamespacesWithSubpages, $wgLanguageCode;
647 static $fname = "OutputPage::replaceInternalLinks" ;
648 wfProfileIn( $fname );
650 wfProfileIn( "$fname-setup" );
653 if ( !$tc ) { $tc = Title
::legalChars() . "#"; }
654 if ( !$sk ) { $sk = $wgUser->getSkin(); }
656 # Match a link having the form [[namespace:link|alternate]]trail
658 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
659 # Match the end of a line for a word that's not followed by whitespace,
660 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
661 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
662 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
663 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
666 # Special and Media are pseudo-namespaces; no pages actually exist in them
667 static $image = FALSE;
668 static $special = FALSE;
669 static $media = FALSE;
670 static $category = FALSE;
671 static $nottalk = "";
672 if ( !$image ) { $image = Namespace::getImage(); }
673 if ( !$special ) { $special = Namespace::getSpecial(); }
674 if ( !$media ) { $media = Namespace::getMedia(); }
675 if ( !$category ) { $category = wfMsg ( "category" ) ; }
676 if ( $nottalk=="" ) { $nottalk = !Namespace::isTalk( $wgTitle->getNamespace() ); }
679 wfProfileOut( "$fname-setup" );
681 $prefix = $new_prefix;
682 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $line, $m ) ) {
688 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
691 } else { # Invalid form; output directly
692 $s .= $prefix . "[[" . $line ;
698 :Foobar -- override special treatment of prefix (images, language links)
699 /Foobar -- convert to CurrentPage/Foobar
700 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
702 $c = substr($m[1],0,1);
703 $noforce = ($c != ":");
704 if( $c == "/" ) { # subpage
705 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
706 $m[1]=substr($m[1],1,strlen($m[1])-2);
709 $noslash=substr($m[1],1);
711 if($wgNamespacesWithSubpages[$wgTitle->getNamespace()]) { # subpages allowed here
712 $link = $wgTitle->getPrefixedText(). "/" . trim($noslash);
715 } # this might be changed for ugliness reasons
717 $link = $noslash; # no subpage allowed, use standard link
719 } elseif( $noforce ) { # no subpage
722 $link = substr( $m[1], 1 );
727 $nt = Title
::newFromText( $link );
729 $s .= $prefix . "[[" . $line;
732 $ns = $nt->getNamespace();
733 $iw = $nt->getInterWiki();
735 if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
736 array_push( $this->mOutput
->mLanguageLinks
, $nt->getPrefixedText() );
737 $s .= $prefix . $trail;
740 if( $ns == $image ) {
741 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
742 $wgLinkCache->addImageLinkObj( $nt );
746 if( ( $nt->getPrefixedText() == $wgTitle->getPrefixedText() ) &&
747 ( strpos( $link, "#" ) == FALSE ) ) {
748 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
751 if ( $ns == $category && $wgUseCategoryMagic ) {
752 $t = explode ( ":" , $nt->getText() ) ;
754 $t = implode ( ":" , $t ) ;
755 $t = $wgLang->ucFirst ( $t ) ;
756 # $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
757 $nnt = Title
::newFromText ( $category.":".$t ) ;
758 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
759 $this->mCategoryLinks
[] = $t ;
760 $s .= $prefix . $trail ;
763 if( $ns == $media ) {
764 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
765 $wgLinkCache->addImageLinkObj( $nt );
767 } elseif( $ns == $special ) {
768 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
771 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
773 wfProfileOut( $fname );
777 # Some functions here used by doBlockLevels()
779 /* private */ function closeParagraph()
782 if ( 0 != strcmp( "p", $this->mLastSection
) &&
783 0 != strcmp( "", $this->mLastSection
) ) {
784 $result = "</" . $this->mLastSection
. ">";
786 $this->mLastSection
= "";
789 # getCommon() returns the length of the longest common substring
790 # of both arguments, starting at the beginning of both.
792 /* private */ function getCommon( $st1, $st2 )
794 $fl = strlen( $st1 );
795 $shorter = strlen( $st2 );
796 if ( $fl < $shorter ) { $shorter = $fl; }
798 for ( $i = 0; $i < $shorter; ++
$i ) {
799 if ( $st1{$i} != $st2{$i} ) { break; }
803 # These next three functions open, continue, and close the list
804 # element appropriate to the prefix character passed into them.
806 /* private */ function openList( $char )
808 $result = $this->closeParagraph();
810 if ( "*" == $char ) { $result .= "<ul><li>"; }
811 else if ( "#" == $char ) { $result .= "<ol><li>"; }
812 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
813 else if ( ";" == $char ) {
814 $result .= "<dl><dt>";
815 $this->mDTopen
= true;
817 else { $result = "<!-- ERR 1 -->"; }
822 /* private */ function nextItem( $char )
824 if ( "*" == $char ||
"#" == $char ) { return "</li><li>"; }
825 else if ( ":" == $char ||
";" == $char ) {
827 if ( $this->mDTopen
) { $close = "</dt>"; }
828 if ( ";" == $char ) {
829 $this->mDTopen
= true;
830 return $close . "<dt>";
832 $this->mDTopen
= false;
833 return $close . "<dd>";
836 return "<!-- ERR 2 -->";
839 /* private */function closeList( $char )
841 if ( "*" == $char ) { $text = "</li></ul>"; }
842 else if ( "#" == $char ) { $text = "</li></ol>"; }
843 else if ( ":" == $char ) {
844 if ( $this->mDTopen
) {
845 $this->mDTopen
= false;
846 $text = "</dt></dl>";
848 $text = "</dd></dl>";
851 else { return "<!-- ERR 3 -->"; }
855 /* private */ function doBlockLevels( $text, $linestart )
857 $fname = "OutputPage::doBlockLevels";
858 wfProfileIn( $fname );
859 # Parsing through the text line by line. The main thing
860 # happening here is handling of block-level elements p, pre,
861 # and making lists from lines starting with * # : etc.
863 $a = explode( "\n", $text );
864 $text = $lastPref = "";
865 $this->mDTopen
= $inBlockElem = false;
867 if ( ! $linestart ) { $text .= array_shift( $a ); }
868 foreach ( $a as $t ) {
869 if ( "" != $text ) { $text .= "\n"; }
872 $opl = strlen( $lastPref );
873 $npl = strspn( $t, "*#:;" );
874 $pref = substr( $t, 0, $npl );
875 $pref2 = str_replace( ";", ":", $pref );
876 $t = substr( $t, $npl );
878 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
879 $text .= $this->nextItem( substr( $pref, -1 ) );
881 if ( ";" == substr( $pref, -1 ) ) {
882 $cpos = strpos( $t, ":" );
883 if ( ! ( false === $cpos ) ) {
884 $term = substr( $t, 0, $cpos );
885 $text .= $term . $this->nextItem( ":" );
886 $t = substr( $t, $cpos +
1 );
889 } else if (0 != $npl ||
0 != $opl) {
890 $cpl = $this->getCommon( $pref, $lastPref );
892 while ( $cpl < $opl ) {
893 $text .= $this->closeList( $lastPref{$opl-1} );
896 if ( $npl <= $cpl && $cpl > 0 ) {
897 $text .= $this->nextItem( $pref{$cpl-1} );
899 while ( $npl > $cpl ) {
900 $char = substr( $pref, $cpl, 1 );
901 $text .= $this->openList( $char );
903 if ( ";" == $char ) {
904 $cpos = strpos( $t, ":" );
905 if ( ! ( false === $cpos ) ) {
906 $term = substr( $t, 0, $cpos );
907 $text .= $term . $this->nextItem( ":" );
908 $t = substr( $t, $cpos +
1 );
915 if ( 0 == $npl ) { # No prefix--go to paragraph mode
917 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
918 $text .= $this->closeParagraph();
921 if ( ! $inBlockElem ) {
922 if ( " " == $t{0} ) {
924 # $t = wfEscapeHTML( $t );
926 else { $newSection = "p"; }
928 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
929 $text .= $this->closeParagraph();
930 $text .= "<" . $newSection . ">";
931 } else if ( 0 != strcmp( $this->mLastSection
,
933 $text .= $this->closeParagraph();
934 if ( 0 != strcmp( "p", $newSection ) ) {
935 $text .= "<" . $newSection . ">";
938 $this->mLastSection
= $newSection;
941 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
942 $inBlockElem = false;
948 $text .= $this->closeList( $pref2{$npl-1} );
951 if ( "" != $this->mLastSection
) {
952 if ( "p" != $this->mLastSection
) {
953 $text .= "</" . $this->mLastSection
. ">";
955 $this->mLastSection
= "";
957 wfProfileOut( $fname );
961 /* private */ function replaceVariables( $text )
963 global $wgLang, $wgCurOut;
964 $fname = "OutputPage::replaceVariables";
965 wfProfileIn( $fname );
970 # See Language.php for the definition of each magic word
971 # As with sigs, this uses the server's local time -- ensure
972 # this is appropriate for your audience!
974 $magic[MAG_CURRENTMONTH
] = date( "m" );
975 $magic[MAG_CURRENTMONTHNAME
] = $wgLang->getMonthName( date("n") );
976 $magic[MAG_CURRENTMONTHNAMEGEN
] = $wgLang->getMonthNameGen( date("n") );
977 $magic[MAG_CURRENTDAY
] = date("j");
978 $magic[MAG_CURRENTDAYNAME
] = $wgLang->getWeekdayName( date("w")+
1 );
979 $magic[MAG_CURRENTYEAR
] = date( "Y" );
980 $magic[MAG_CURRENTTIME
] = $wgLang->time( wfTimestampNow(), false );
982 $this->mContainsOldMagic +
= MagicWord
::replaceMultiple($magic, $text, $text);
984 $mw =& MagicWord
::get( MAG_NUMBEROFARTICLES
);
985 if ( $mw->match( $text ) ) {
986 $v = wfNumberOfArticles();
987 $text = $mw->replace( $v, $text );
988 if( $mw->getWasModified() ) { $this->mContainsOldMagic++
; }
991 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
992 # The callbacks are at the bottom of this file
994 $mw =& MagicWord
::get( MAG_MSG
);
995 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
996 if( $mw->getWasModified() ) { $this->mContainsNewMagic++
; }
998 $mw =& MagicWord
::get( MAG_MSGNW
);
999 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1000 if( $mw->getWasModified() ) { $this->mContainsNewMagic++
; }
1002 wfProfileOut( $fname );
1006 # Cleans up HTML, removes dangerous tags and attributes
1007 /* private */ function removeHTMLtags( $text )
1009 $fname = "OutputPage::removeHTMLtags";
1010 wfProfileIn( $fname );
1011 $htmlpairs = array( # Tags that must be closed
1012 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1013 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1014 "strike", "strong", "tt", "var", "div", "center",
1015 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1016 "ruby", "rt" , "rb" , "rp"
1018 $htmlsingle = array(
1019 "br", "p", "hr", "li", "dt", "dd"
1021 $htmlnest = array( # Tags that can be nested--??
1022 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1023 "dl", "font", "big", "small", "sub", "sup"
1025 $tabletags = array( # Can only appear inside table
1029 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1030 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1032 $htmlattrs = $this->getHTMLattrs () ;
1034 # Remove HTML comments
1035 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1037 $bits = explode( "<", $text );
1038 $text = array_shift( $bits );
1039 $tagstack = array(); $tablestack = array();
1041 foreach ( $bits as $x ) {
1042 $prev = error_reporting( E_ALL
& ~
( E_NOTICE | E_WARNING
) );
1043 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1045 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1046 error_reporting( $prev );
1049 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1053 if ( ! in_array( $t, $htmlsingle ) &&
1054 ( $ot = array_pop( $tagstack ) ) != $t ) {
1055 array_push( $tagstack, $ot );
1058 if ( $t == "table" ) {
1059 $tagstack = array_pop( $tablestack );
1064 # Keep track for later
1065 if ( in_array( $t, $tabletags ) &&
1066 ! in_array( "table", $tagstack ) ) {
1068 } else if ( in_array( $t, $tagstack ) &&
1069 ! in_array ( $t , $htmlnest ) ) {
1071 } else if ( ! in_array( $t, $htmlsingle ) ) {
1072 if ( $t == "table" ) {
1073 array_push( $tablestack, $tagstack );
1074 $tagstack = array();
1076 array_push( $tagstack, $t );
1078 # Strip non-approved attributes from the tag
1079 $newparams = $this->fixTagAttributes($params);
1083 $rest = str_replace( ">", ">", $rest );
1084 $text .= "<$slash$t $newparams$brace$rest";
1088 $text .= "<" . str_replace( ">", ">", $x);
1090 # Close off any remaining tags
1091 while ( $t = array_pop( $tagstack ) ) {
1093 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1095 wfProfileOut( $fname );
1101 * This function accomplishes several tasks:
1102 * 1) Auto-number headings if that option is enabled
1103 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1104 * 3) Add a Table of contents on the top for users who have enabled the option
1105 * 4) Auto-anchor headings
1107 * It loops through all headlines, collects the necessary data, then splits up the
1108 * string and re-inserts the newly formatted headlines.
1111 /* private */ function formatHeadings( $text )
1113 global $wgUser,$wgArticle,$wgTitle,$wpPreview;
1114 $nh=$wgUser->getOption( "numberheadings" );
1115 $st=$wgUser->getOption( "showtoc" );
1116 if(!$wgTitle->userCanEdit()) {
1120 $es=$wgUser->getID() && $wgUser->getOption( "editsection" );
1121 $esr=$wgUser->getID() && $wgUser->getOption( "editsectiononrightclick" );
1124 # Inhibit editsection links if requested in the page
1125 $esw =& MagicWord
::get( MAG_NOEDITSECTION
);
1126 if ($esw->matchAndRemove( $text )) {
1129 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1131 $mw =& MagicWord
::get( MAG_NOTOC
);
1132 if ($mw->matchAndRemove( $text ))
1137 # never add the TOC to the Main Page. This is an entry page that should not
1138 # be more than 1-2 screens large anyway
1139 if($wgTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1141 # We need this to perform operations on the HTML
1142 $sk=$wgUser->getSkin();
1144 # Get all headlines for numbering them and adding funky stuff like [edit]
1146 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1151 # Ugh .. the TOC should have neat indentation levels which can be
1152 # passed to the skin functions. These are determined here
1153 foreach($matches[3] as $headline) {
1154 if($level) { $prevlevel=$level;}
1155 $level=$matches[1][$c];
1156 if(($nh||
$st) && $prevlevel && $level>$prevlevel) {
1158 $h[$level]=0; // reset when we enter a new level
1159 $toc.=$sk->tocIndent($level-$prevlevel);
1160 $toclevel+
=$level-$prevlevel;
1163 if(($nh||
$st) && $level<$prevlevel) {
1164 $h[$level+
1]=0; // reset when we step back a level
1165 $toc.=$sk->tocUnindent($prevlevel-$level);
1166 $toclevel-=$prevlevel-$level;
1169 $h[$level]++
; // count number of headlines for each level
1172 for($i=1;$i<=$level;$i++
) {
1174 if($dot) {$numbering.=".";}
1181 // The canonized header is a version of the header text safe to use for links
1183 $canonized_headline=preg_replace("/<.*?>/","",$headline); // strip out HTML
1184 $tocline = trim( $canonized_headline );
1185 $canonized_headline=str_replace('"',"",$canonized_headline);
1186 $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1187 $refer[$c]=$canonized_headline;
1188 $refers[$canonized_headline]++
; // count how many in assoc. array so we can track dupes in anchors
1189 $refcount[$c]=$refers[$canonized_headline];
1191 // Prepend the number to the heading text
1194 $tocline=$numbering ." ". $tocline;
1196 // Don't number the heading if it is the only one (looks silly)
1197 if($nh && count($matches[3]) > 1) {
1198 $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1202 // Create the anchor for linking from the TOC to the section
1204 $anchor=$canonized_headline;
1205 if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1207 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1209 if($es && !isset($wpPreview)) {
1210 $head[$c].=$sk->editSectionLink($c+
1);
1213 // Put it all together
1215 $head[$c].="<h".$level.$matches[2][$c]
1216 ."<a name=\"".$anchor."\">"
1221 // Add the edit section link
1223 if($esr && !isset($wpPreview)) {
1224 $head[$c]=$sk->editSectionScript($c+
1,$head[$c]);
1234 $toc.=$sk->tocUnindent($toclevel);
1235 $toc=$sk->tocTable($toc);
1238 // split up and insert constructed headlines
1240 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1243 foreach($blocks as $block) {
1244 if(($es) && !isset($wpPreview) && $c>0 && $i==0) {
1245 # This is the [edit] link that appears for the top block of text when
1246 # section editing is enabled
1247 $full.=$sk->editSectionLink(0);
1250 if($st && $toclines>3 && !$i) {
1251 # Let's add a top anchor just in case we want to link to the top of the page
1252 $full="<a name=\"top\"></a>".$full.$toc;
1262 /* private */ function magicISBN( $text )
1266 $a = split( "ISBN ", " $text" );
1267 if ( count ( $a ) < 2 ) return $text;
1268 $text = substr( array_shift( $a ), 1);
1269 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1271 foreach ( $a as $x ) {
1272 $isbn = $blank = "" ;
1273 while ( " " == $x{0} ) {
1275 $x = substr( $x, 1 );
1277 while ( strstr( $valid, $x{0} ) != false ) {
1279 $x = substr( $x, 1 );
1281 $num = str_replace( "-", "", $isbn );
1282 $num = str_replace( " ", "", $num );
1285 $text .= "ISBN $blank$x";
1287 $text .= "<a href=\"" . wfLocalUrlE( $wgLang->specialPage(
1288 "Booksources"), "isbn={$num}" ) . "\" class=\"internal\">ISBN $isbn</a>";
1295 /* private */ function magicRFC( $text )
1305 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1307 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1308 $containsOldMagic = false )
1310 $this->mText
= $text;
1311 $this->mLanguageLinks
= $languageLinks;
1312 $this->mCategoryLinks
= $categoryLinks;
1313 $this->mContainsOldMagic
= $containsOldMagic;
1316 function getText() { return $this->mText
; }
1317 function getLanguageLinks() { return $this->mLanguageLinks
; }
1318 function getCategoryLinks() { return $this->mCategoryLinks
; }
1319 function containsOldMagic() { return $this->mContainsOldMagic
; }
1320 function setText( $text ) { return wfSetVar( $this->mText
, $text ); }
1321 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks
, $ll ); }
1322 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks
, $cl ); }
1323 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic
, $com ); }
1326 # Regex callbacks, used in OutputPage::replaceVariables
1328 # Just get rid of the dangerous stuff
1329 # Necessary because replaceVariables is called after removeHTMLtags,
1330 # and message text can come from any user
1331 function wfReplaceMsgVar( $matches ) {
1332 global $wgCurOut, $wgLinkCache;
1333 $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1334 $wgLinkCache->suspend();
1335 $text = $wgCurOut->replaceInternalLinks( $text );
1336 $wgLinkCache->resume();
1337 $wgLinkCache->addLinkObj( Title
::makeTitle( NS_MEDIAWIKI
, $matches[1] ) );
1341 # Effective <nowiki></nowiki>
1342 # Not real <nowiki> because this is called after nowiki sections are processed
1343 function wfReplaceMsgnwVar( $matches ) {
1344 global $wgCurOut, $wgLinkCache;
1345 $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1346 $wgLinkCache->addLinkObj( Title
::makeTitle( NS_MEDIAWIKI
, $matches[1] ) );