4 # objects: $wgUser, $wgTitle, $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut, $wgArticle
8 # settings: $wgUseTex, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic,
9 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgUseLinkPrefixCombination
13 var $mOutput, $mAutonumber, $mLastSection, $mDTopen;
22 $this->mOutput
= new ParserOutput
;
23 $this->mAutonumber
= 0;
24 $this->mLastSection
= "";
25 $this->mDTopen
= false;
28 # First pass--just handle <nowiki> sections, pass the rest off
29 # to doWikiPass2() which does all the real work.
31 # Returns a ParserOutput
33 function parse( $text, $linestart = true, $clearState = true )
36 $fname = "Parser::parse";
37 wfProfileIn( $fname );
38 $unique = "3iyZiyA7iMwg5rhxP0Dcc9oTnj8qD1jm1Sfv4";
39 $unique2 = "4LIQ9nXtiYFPCSfitVwDw7EYwQlL4GeeQ7qSO";
40 $unique3 = "fPaA8gDfdLBqzj68Yjg9Hil3qEF8JGO0uszIp";
55 # Replace any instances of the placeholders
56 $text = str_replace( $unique, wfHtmlEscapeFirst( $unique ), $text );
57 $text = str_replace( $unique2, wfHtmlEscapeFirst( $unique2 ), $text );
58 $text = str_replace( $unique3, wfHtmlEscapeFirst( $unique3 ), $text );
60 while ( "" != $text ) {
61 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
63 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) { $text = ""; }
65 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
67 $nwlist[$nwsecs] = wfEscapeHTMLTagsOnly($q[0]);
68 $stripped .= $unique . $nwsecs . "s";
74 while ( "" != $stripped ) {
75 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
77 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) { $stripped = ""; }
79 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
81 $mathlist[$mathsecs] = renderMath($q[0]);
82 $stripped2 .= $unique2 . $mathsecs . "s";
87 $stripped2 = $stripped;
90 while ( "" != $stripped2 ) {
91 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
93 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) { $stripped2 = ""; }
95 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
97 $prelist[$presecs] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
98 $stripped3 .= $unique3 . $presecs . "s";
103 $text = $this->doWikiPass2( $stripped3, $linestart );
105 $specialChars = array("\\", "$");
106 $escapedChars = array("\\\\", "\\$");
108 # Go backwards so that {$unique1}1 doesn't overwrite {$unique1}10
109 for ( $i = $presecs; $i >= 1; --$i ) {
110 $text = preg_replace( "/{$unique3}{$i}s/", str_replace( $specialChars,
111 $escapedChars, $prelist[$i] ), $text );
114 for ( $i = $mathsecs; $i >= 1; --$i ) {
115 $text = preg_replace( "/{$unique2}{$i}s/", str_replace( $specialChars,
116 $escapedChars, $mathlist[$i] ), $text );
119 for ( $i = $nwsecs; $i >= 1; --$i ) {
120 $text = preg_replace( "/{$unique}{$i}s/", str_replace( $specialChars,
121 $escapedChars, $nwlist[$i] ), $text );
124 $this->mOutput
->setText( $text );
125 wfProfileOut( $fname );
126 return $this->mOutput
;
129 function categoryMagic ()
131 global $wgTitle , $wgUseCategoryMagic, $wgLang ;
132 if ( !isset ( $wgUseCategoryMagic ) ||
!$wgUseCategoryMagic ) return ;
133 $id = $wgTitle->getArticleID() ;
134 $cat = ucfirst ( wfMsg ( "category" ) ) ;
135 $ti = $wgTitle->getText() ;
136 $ti = explode ( ":" , $ti , 2 ) ;
137 if ( $cat != $ti[0] ) return "" ;
138 $r = "<br break=all>\n" ;
140 $articles = array() ;
141 $parents = array () ;
142 $children = array() ;
146 $sk = $wgUser->getSkin() ;
150 $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
152 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
155 $res = wfQuery ( $sql, DB_READ
) ;
156 while ( $x = wfFetchObject ( $res ) )
159 # $t->newFromDBkey ( $x->l_from ) ;
160 # $t = $t->getText() ;
164 $t = $wgLang->getNsText ( $x->cur_namespace
) ;
165 if ( $t != "" ) $t .= ":" ;
166 $t .= $x->cur_title
;
169 $y = explode ( ":" , $t , 2 ) ;
170 if ( count ( $y ) == 2 && $y[0] == $cat ) {
171 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
173 array_push ( $articles , $sk->makeLink ( $t ) ) ;
176 wfFreeResult ( $res ) ;
179 if ( count ( $children ) > 0 )
181 asort ( $children ) ;
182 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
183 $r .= implode ( ", " , $children ) ;
187 if ( count ( $articles ) > 0 )
189 asort ( $articles ) ;
190 $h = wfMsg( "category_header", $ti[1] );
191 $r .= "<h2>{$h}</h2>\n" ;
192 $r .= implode ( ", " , $articles ) ;
199 function getHTMLattrs ()
201 $htmlattrs = array( # Allowed attributes--no scripting, etc.
202 "title", "align", "lang", "dir", "width", "height",
203 "bgcolor", "clear", /* BR */ "noshade", /* HR */
204 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
205 /* FONT */ "type", "start", "value", "compact",
206 /* For various lists, mostly deprecated but safe */
207 "summary", "width", "border", "frame", "rules",
208 "cellspacing", "cellpadding", "valign", "char",
209 "charoff", "colgroup", "col", "span", "abbr", "axis",
210 "headers", "scope", "rowspan", "colspan", /* Tables */
211 "id", "class", "name", "style" /* For CSS */
216 function fixTagAttributes ( $t )
218 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
219 $htmlattrs = $this->getHTMLattrs() ;
221 # Strip non-approved attributes from the tag
223 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
224 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
226 # Strip javascript "expression" from stylesheets. Brute force approach:
227 # If anythin offensive is found, all attributes of the HTML tag are dropped
230 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
231 wfMungeToUtf8( $t ) ) )
239 function doTableStuff ( $t )
241 $t = explode ( "\n" , $t ) ;
242 $td = array () ; # Is currently a td tag open?
243 $ltd = array () ; # Was it TD or TH?
244 $tr = array () ; # Is currently a tr tag open?
245 $ltr = array () ; # tr attributes
246 foreach ( $t AS $k => $x )
249 $fc = substr ( $x , 0 , 1 ) ;
250 if ( "{|" == substr ( $x , 0 , 2 ) )
252 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
253 array_push ( $td , false ) ;
254 array_push ( $ltd , "" ) ;
255 array_push ( $tr , false ) ;
256 array_push ( $ltr , "" ) ;
258 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
259 else if ( "|}" == substr ( $x , 0 , 2 ) )
262 $l = array_pop ( $ltd ) ;
263 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
264 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
268 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
270 $z = trim ( substr ( $x , 2 ) ) ;
271 $t[$k] = "<caption>{$z}</caption>\n" ;
273 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
275 $x = substr ( $x , 1 ) ;
276 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
278 $l = array_pop ( $ltd ) ;
279 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
280 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
283 array_push ( $tr , false ) ;
284 array_push ( $td , false ) ;
285 array_push ( $ltd , "" ) ;
286 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
288 else if ( "|" == $fc ||
"!" == $fc ||
"|+" == substr ( $x , 0 , 2 ) ) # Caption
290 if ( "|+" == substr ( $x , 0 , 2 ) )
293 $x = substr ( $x , 1 ) ;
295 $after = substr ( $x , 1 ) ;
296 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
297 $after = explode ( "||" , $after ) ;
299 foreach ( $after AS $theline )
304 $tra = array_pop ( $ltr ) ;
305 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
306 array_push ( $tr , true ) ;
307 array_push ( $ltr , "" ) ;
310 $l = array_pop ( $ltd ) ;
311 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
312 if ( $fc == "|" ) $l = "TD" ;
313 else if ( $fc == "!" ) $l = "TH" ;
314 else if ( $fc == "+" ) $l = "CAPTION" ;
316 array_push ( $ltd , $l ) ;
317 $y = explode ( "|" , $theline , 2 ) ;
318 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
319 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
321 array_push ( $td , true ) ;
326 # Closing open td, tr && table
327 while ( count ( $td ) > 0 )
329 if ( array_pop ( $td ) ) $t[] = "</td>" ;
330 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
334 $t = implode ( "\n" , $t ) ;
335 # $t = $this->removeHTMLtags( $t );
339 # Well, OK, it's actually about 14 passes. But since all the
340 # hard lifting is done inside PHP's regex code, it probably
341 # wouldn't speed things up much to add a real parser.
343 function doWikiPass2( $text, $linestart )
345 global $wgUser, $wgLang, $wgUseDynamicDates;
346 $fname = "OutputPage::doWikiPass2";
347 wfProfileIn( $fname );
349 $text = $this->removeHTMLtags( $text );
350 $text = $this->replaceVariables( $text );
352 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
353 $text = str_replace ( "<HR>", "<hr>", $text );
355 $text = $this->doAllQuotes( $text );
356 $text = $this->doHeadings( $text );
357 $text = $this->doBlockLevels( $text, $linestart );
359 if($wgUseDynamicDates) {
360 global $wgDateFormatter;
361 $text = $wgDateFormatter->reformat( $wgUser->getOption("date"), $text );
364 $text = $this->replaceExternalLinks( $text );
365 $text = $this->replaceInternalLinks ( $text );
366 $text = $this->doTableStuff ( $text ) ;
368 $text = $this->magicISBN( $text );
369 $text = $this->magicRFC( $text );
370 $text = $this->formatHeadings( $text );
372 $sk = $wgUser->getSkin();
373 $text = $sk->transformContent( $text );
374 $text .= $this->categoryMagic () ;
376 wfProfileOut( $fname );
380 /* private */ function doAllQuotes( $text )
383 $lines = explode( "\r\n", $text );
384 foreach ( $lines as $line ) {
385 $outtext .= $this->doQuotes ( "", $line, "" ) . "\r\n";
390 /* private */ function doQuotes( $pre, $text, $mode )
392 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
393 $m1_strong = ($m[1] == "") ?
"" : "<strong>{$m[1]}</strong>";
394 $m1_em = ($m[1] == "") ?
"" : "<em>{$m[1]}</em>";
395 if ( substr ($m[2], 0, 1) == "'" ) {
396 $m[2] = substr ($m[2], 1);
398 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ?
"both" : "emstrong" );
399 } else if ($mode == "strong") {
400 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
401 } else if (($mode == "emstrong") ||
($mode == "both")) {
402 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
403 } else if ($mode == "strongem") {
404 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
406 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
409 if ($mode == "strong") {
410 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ?
"both" : "strongem" );
411 } else if ($mode == "em") {
412 return $m1_em . $this->doQuotes ( "", $m[2], "" );
413 } else if ($mode == "emstrong") {
414 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
415 } else if (($mode == "strongem") ||
($mode == "both")) {
416 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
418 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
422 $text_strong = ($text == "") ?
"" : "<strong>{$text}</strong>";
423 $text_em = ($text == "") ?
"" : "<em>{$text}</em>";
426 } else if ($mode == "em") {
427 return $pre . $text_em;
428 } else if ($mode == "strong") {
429 return $pre . $text_strong;
430 } else if ($mode == "strongem") {
431 return (($pre == "") && ($text == "")) ?
"" : "<strong>{$pre}{$text_em}</strong>";
433 return (($pre == "") && ($text == "")) ?
"" : "<em>{$pre}{$text_strong}</em>";
438 /* private */ function doHeadings( $text )
440 for ( $i = 6; $i >= 1; --$i ) {
441 $h = substr( "======", 0, $i );
442 $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
443 "<h{$i}>\\1</h{$i}>\\2", $text );
448 # Note: we have to do external links before the internal ones,
449 # and otherwise take great care in the order of things here, so
450 # that we don't end up interpreting some URLs twice.
452 /* private */ function replaceExternalLinks( $text )
454 $fname = "OutputPage::replaceExternalLinks";
455 wfProfileIn( $fname );
456 $text = $this->subReplaceExternalLinks( $text, "http", true );
457 $text = $this->subReplaceExternalLinks( $text, "https", true );
458 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
459 $text = $this->subReplaceExternalLinks( $text, "irc", false );
460 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
461 $text = $this->subReplaceExternalLinks( $text, "news", false );
462 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
463 wfProfileOut( $fname );
467 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
469 global $wgUser, $printable;
470 global $wgAllowExternalImages;
473 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
474 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
476 # this is the list of separators that should be ignored if they
477 # are the last character of an URL but that should be included
478 # if they occur within the URL, e.g. "go to www.foo.com, where .."
479 # in this case, the last comma should not become part of the URL,
480 # but in "www.foo.com/123,2342,32.htm" it should.
482 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
483 $images = "gif|png|jpg|jpeg";
485 # PLEASE NOTE: The curly braces { } are not part of the regex,
486 # they are interpreted as part of the string (used to tell PHP
487 # that the content of the string should be inserted there).
488 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
489 "((?i){$images})([^{$uc}]|$)/";
491 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
492 $sk = $wgUser->getSkin();
494 if ( $autonumber and $wgAllowExternalImages) { # Use img tags only for HTTP urls
495 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
496 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
498 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
499 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
500 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
502 $s = str_replace( $unique, $protocol, $s );
504 $a = explode( "[{$protocol}:", " " . $s );
505 $s = array_shift( $a );
506 $s = substr( $s, 1 );
508 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
509 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
511 foreach ( $a as $line ) {
512 if ( preg_match( $e1, $line, $m ) ) {
513 $link = "{$protocol}:{$m[1]}";
515 if ( $autonumber ) { $text = "[" . ++
$this->mAutonumber
. "]"; }
516 else { $text = wfEscapeHTML( $link ); }
517 } else if ( preg_match( $e2, $line, $m ) ) {
518 $link = "{$protocol}:{$m[1]}";
522 $s .= "[{$protocol}:" . $line;
525 if ( $printable == "yes") $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
527 $la = $sk->getExternalLinkAttributes( $link, $text );
528 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
534 /* private */ function replaceInternalLinks( $s )
536 global $wgTitle, $wgUser, $wgLang;
537 global $wgLinkCache, $wgInterwikiMagic, $wgUseCategoryMagic;
538 global $wgNamespacesWithSubpages, $wgLanguageCode;
539 wfProfileIn( $fname = "OutputPage::replaceInternalLinks" );
541 wfProfileIn( "$fname-setup" );
542 $tc = Title
::legalChars() . "#";
543 $sk = $wgUser->getSkin();
545 $a = explode( "[[", " " . $s );
546 $s = array_shift( $a );
547 $s = substr( $s, 1 );
549 # Match a link having the form [[namespace:link|alternate]]trail
550 $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD";
551 # Match the end of a line for a word that's not followed by whitespace,
552 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
553 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
554 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
555 $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
558 # Special and Media are pseudo-namespaces; no pages actually exist in them
559 $image = Namespace::getImage();
560 $special = Namespace::getSpecial();
561 $media = Namespace::getMedia();
562 $category = wfMsg ( "category" ) ;
563 $nottalk = !Namespace::isTalk( $wgTitle->getNamespace() );
565 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
572 wfProfileOut( "$fname-setup" );
574 foreach ( $a as $line ) {
575 $prefix = $new_prefix;
576 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $line, $m ) ) {
582 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
585 } else { # Invalid form; output directly
586 $s .= $prefix . "[[" . $line ;
592 :Foobar -- override special treatment of prefix (images, language links)
593 /Foobar -- convert to CurrentPage/Foobar
594 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
596 $c = substr($m[1],0,1);
597 $noforce = ($c != ":");
598 if( $c == "/" ) { # subpage
599 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
600 $m[1]=substr($m[1],1,strlen($m[1])-2);
603 $noslash=substr($m[1],1);
605 if($wgNamespacesWithSubpages[$wgTitle->getNamespace()]) { # subpages allowed here
606 $link = $wgTitle->getPrefixedText(). "/" . trim($noslash);
609 } # this might be changed for ugliness reasons
611 $link = $noslash; # no subpage allowed, use standard link
613 } elseif( $noforce ) { # no subpage
616 $link = substr( $m[1], 1 );
621 $nt = Title
::newFromText( $link );
623 $s .= $prefix . "[[" . $line;
626 $ns = $nt->getNamespace();
627 $iw = $nt->getInterWiki();
629 if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
630 array_push( $this->mOutput
->mLanguageLinks
, $nt->getPrefixedText() );
631 $s .= $prefix . $trail;
634 if( $ns == $image ) {
635 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
636 $wgLinkCache->addImageLinkObj( $nt );
640 if( ( $nt->getPrefixedText() == $wgTitle->getPrefixedText() ) &&
641 ( strpos( $link, "#" ) == FALSE ) ) {
642 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
645 if ( $ns == $category && $wgUseCategoryMagic ) {
646 $t = explode ( ":" , $nt->getText() ) ;
648 $t = implode ( ":" , $t ) ;
649 $t = $wgLang->ucFirst ( $t ) ;
650 # $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
651 $nnt = Title
::newFromText ( $category.":".$t ) ;
652 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
653 $this->mCategoryLinks
[] = $t ;
654 $s .= $prefix . $trail ;
657 if( $ns == $media ) {
658 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
659 $wgLinkCache->addImageLinkObj( $nt );
661 } elseif( $ns == $special ) {
662 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
665 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
667 wfProfileOut( $fname );
671 # Some functions here used by doBlockLevels()
673 /* private */ function closeParagraph()
676 if ( 0 != strcmp( "p", $this->mLastSection
) &&
677 0 != strcmp( "", $this->mLastSection
) ) {
678 $result = "</" . $this->mLastSection
. ">";
680 $this->mLastSection
= "";
683 # getCommon() returns the length of the longest common substring
684 # of both arguments, starting at the beginning of both.
686 /* private */ function getCommon( $st1, $st2 )
688 $fl = strlen( $st1 );
689 $shorter = strlen( $st2 );
690 if ( $fl < $shorter ) { $shorter = $fl; }
692 for ( $i = 0; $i < $shorter; ++
$i ) {
693 if ( $st1{$i} != $st2{$i} ) { break; }
697 # These next three functions open, continue, and close the list
698 # element appropriate to the prefix character passed into them.
700 /* private */ function openList( $char )
702 $result = $this->closeParagraph();
704 if ( "*" == $char ) { $result .= "<ul><li>"; }
705 else if ( "#" == $char ) { $result .= "<ol><li>"; }
706 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
707 else if ( ";" == $char ) {
708 $result .= "<dl><dt>";
709 $this->mDTopen
= true;
711 else { $result = "<!-- ERR 1 -->"; }
716 /* private */ function nextItem( $char )
718 if ( "*" == $char ||
"#" == $char ) { return "</li><li>"; }
719 else if ( ":" == $char ||
";" == $char ) {
721 if ( $this->mDTopen
) { $close = "</dt>"; }
722 if ( ";" == $char ) {
723 $this->mDTopen
= true;
724 return $close . "<dt>";
726 $this->mDTopen
= false;
727 return $close . "<dd>";
730 return "<!-- ERR 2 -->";
733 /* private */function closeList( $char )
735 if ( "*" == $char ) { $text = "</li></ul>"; }
736 else if ( "#" == $char ) { $text = "</li></ol>"; }
737 else if ( ":" == $char ) {
738 if ( $this->mDTopen
) {
739 $this->mDTopen
= false;
740 $text = "</dt></dl>";
742 $text = "</dd></dl>";
745 else { return "<!-- ERR 3 -->"; }
749 /* private */ function doBlockLevels( $text, $linestart )
751 $fname = "OutputPage::doBlockLevels";
752 wfProfileIn( $fname );
753 # Parsing through the text line by line. The main thing
754 # happening here is handling of block-level elements p, pre,
755 # and making lists from lines starting with * # : etc.
757 $a = explode( "\n", $text );
758 $text = $lastPref = "";
759 $this->mDTopen
= $inBlockElem = false;
761 if ( ! $linestart ) { $text .= array_shift( $a ); }
762 foreach ( $a as $t ) {
763 if ( "" != $text ) { $text .= "\n"; }
766 $opl = strlen( $lastPref );
767 $npl = strspn( $t, "*#:;" );
768 $pref = substr( $t, 0, $npl );
769 $pref2 = str_replace( ";", ":", $pref );
770 $t = substr( $t, $npl );
772 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
773 $text .= $this->nextItem( substr( $pref, -1 ) );
775 if ( ";" == substr( $pref, -1 ) ) {
776 $cpos = strpos( $t, ":" );
777 if ( ! ( false === $cpos ) ) {
778 $term = substr( $t, 0, $cpos );
779 $text .= $term . $this->nextItem( ":" );
780 $t = substr( $t, $cpos +
1 );
783 } else if (0 != $npl ||
0 != $opl) {
784 $cpl = $this->getCommon( $pref, $lastPref );
786 while ( $cpl < $opl ) {
787 $text .= $this->closeList( $lastPref{$opl-1} );
790 if ( $npl <= $cpl && $cpl > 0 ) {
791 $text .= $this->nextItem( $pref{$cpl-1} );
793 while ( $npl > $cpl ) {
794 $char = substr( $pref, $cpl, 1 );
795 $text .= $this->openList( $char );
797 if ( ";" == $char ) {
798 $cpos = strpos( $t, ":" );
799 if ( ! ( false === $cpos ) ) {
800 $term = substr( $t, 0, $cpos );
801 $text .= $term . $this->nextItem( ":" );
802 $t = substr( $t, $cpos +
1 );
809 if ( 0 == $npl ) { # No prefix--go to paragraph mode
811 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
812 $text .= $this->closeParagraph();
815 if ( ! $inBlockElem ) {
816 if ( " " == $t{0} ) {
818 # $t = wfEscapeHTML( $t );
820 else { $newSection = "p"; }
822 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
823 $text .= $this->closeParagraph();
824 $text .= "<" . $newSection . ">";
825 } else if ( 0 != strcmp( $this->mLastSection
,
827 $text .= $this->closeParagraph();
828 if ( 0 != strcmp( "p", $newSection ) ) {
829 $text .= "<" . $newSection . ">";
832 $this->mLastSection
= $newSection;
835 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
836 $inBlockElem = false;
842 $text .= $this->closeList( $pref2{$npl-1} );
845 if ( "" != $this->mLastSection
) {
846 if ( "p" != $this->mLastSection
) {
847 $text .= "</" . $this->mLastSection
. ">";
849 $this->mLastSection
= "";
851 wfProfileOut( $fname );
855 /* private */ function replaceVariables( $text )
857 global $wgLang, $wgCurOut;
858 $fname = "OutputPage::replaceVariables";
859 wfProfileIn( $fname );
864 # See Language.php for the definition of each magic word
865 # As with sigs, this uses the server's local time -- ensure
866 # this is appropriate for your audience!
868 $magic[MAG_CURRENTMONTH
] = date( "m" );
869 $magic[MAG_CURRENTMONTHNAME
] = $wgLang->getMonthName( date("n") );
870 $magic[MAG_CURRENTMONTHNAMEGEN
] = $wgLang->getMonthNameGen( date("n") );
871 $magic[MAG_CURRENTDAY
] = date("j");
872 $magic[MAG_CURRENTDAYNAME
] = $wgLang->getWeekdayName( date("w")+
1 );
873 $magic[MAG_CURRENTYEAR
] = date( "Y" );
874 $magic[MAG_CURRENTTIME
] = $wgLang->time( wfTimestampNow(), false );
876 $this->mContainsOldMagic +
= MagicWord
::replaceMultiple($magic, $text, $text);
878 $mw =& MagicWord
::get( MAG_NUMBEROFARTICLES
);
879 if ( $mw->match( $text ) ) {
880 $v = wfNumberOfArticles();
881 $text = $mw->replace( $v, $text );
882 if( $mw->getWasModified() ) { $this->mContainsOldMagic++
; }
885 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
886 # The callbacks are at the bottom of this file
888 $mw =& MagicWord
::get( MAG_MSG
);
889 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
890 if( $mw->getWasModified() ) { $this->mContainsNewMagic++
; }
892 $mw =& MagicWord
::get( MAG_MSGNW
);
893 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
894 if( $mw->getWasModified() ) { $this->mContainsNewMagic++
; }
896 wfProfileOut( $fname );
900 # Cleans up HTML, removes dangerous tags and attributes
901 /* private */ function removeHTMLtags( $text )
903 $fname = "OutputPage::removeHTMLtags";
904 wfProfileIn( $fname );
905 $htmlpairs = array( # Tags that must be closed
906 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
907 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
908 "strike", "strong", "tt", "var", "div", "center",
909 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
910 "ruby", "rt" , "rb" , "rp"
913 "br", "p", "hr", "li", "dt", "dd"
915 $htmlnest = array( # Tags that can be nested--??
916 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
917 "dl", "font", "big", "small", "sub", "sup"
919 $tabletags = array( # Can only appear inside table
923 $htmlsingle = array_merge( $tabletags, $htmlsingle );
924 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
926 $htmlattrs = $this->getHTMLattrs () ;
928 # Remove HTML comments
929 $text = preg_replace( "/<!--.*-->/sU", "", $text );
931 $bits = explode( "<", $text );
932 $text = array_shift( $bits );
933 $tagstack = array(); $tablestack = array();
935 foreach ( $bits as $x ) {
936 $prev = error_reporting( E_ALL
& ~
( E_NOTICE | E_WARNING
) );
937 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
939 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
940 error_reporting( $prev );
943 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
947 if ( ! in_array( $t, $htmlsingle ) &&
948 ( $ot = array_pop( $tagstack ) ) != $t ) {
949 array_push( $tagstack, $ot );
952 if ( $t == "table" ) {
953 $tagstack = array_pop( $tablestack );
958 # Keep track for later
959 if ( in_array( $t, $tabletags ) &&
960 ! in_array( "table", $tagstack ) ) {
962 } else if ( in_array( $t, $tagstack ) &&
963 ! in_array ( $t , $htmlnest ) ) {
965 } else if ( ! in_array( $t, $htmlsingle ) ) {
966 if ( $t == "table" ) {
967 array_push( $tablestack, $tagstack );
970 array_push( $tagstack, $t );
972 # Strip non-approved attributes from the tag
973 $newparams = $this->fixTagAttributes($params);
977 $rest = str_replace( ">", ">", $rest );
978 $text .= "<$slash$t $newparams$brace$rest";
982 $text .= "<" . str_replace( ">", ">", $x);
984 # Close off any remaining tags
985 while ( $t = array_pop( $tagstack ) ) {
987 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
989 wfProfileOut( $fname );
995 * This function accomplishes several tasks:
996 * 1) Auto-number headings if that option is enabled
997 * 2) Add an [edit] link to sections for logged in users who have enabled the option
998 * 3) Add a Table of contents on the top for users who have enabled the option
999 * 4) Auto-anchor headings
1001 * It loops through all headlines, collects the necessary data, then splits up the
1002 * string and re-inserts the newly formatted headlines.
1005 /* private */ function formatHeadings( $text )
1007 global $wgUser,$wgArticle,$wgTitle,$wpPreview;
1008 $nh=$wgUser->getOption( "numberheadings" );
1009 $st=$wgUser->getOption( "showtoc" );
1010 if(!$wgTitle->userCanEdit()) {
1014 $es=$wgUser->getID() && $wgUser->getOption( "editsection" );
1015 $esr=$wgUser->getID() && $wgUser->getOption( "editsectiononrightclick" );
1018 # Inhibit editsection links if requested in the page
1019 $esw =& MagicWord
::get( MAG_NOEDITSECTION
);
1020 if ($esw->matchAndRemove( $text )) {
1023 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1025 $mw =& MagicWord
::get( MAG_NOTOC
);
1026 if ($mw->matchAndRemove( $text ))
1031 # never add the TOC to the Main Page. This is an entry page that should not
1032 # be more than 1-2 screens large anyway
1033 if($wgTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1035 # We need this to perform operations on the HTML
1036 $sk=$wgUser->getSkin();
1038 # Get all headlines for numbering them and adding funky stuff like [edit]
1040 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1045 # Ugh .. the TOC should have neat indentation levels which can be
1046 # passed to the skin functions. These are determined here
1047 foreach($matches[3] as $headline) {
1048 if($level) { $prevlevel=$level;}
1049 $level=$matches[1][$c];
1050 if(($nh||
$st) && $prevlevel && $level>$prevlevel) {
1052 $h[$level]=0; // reset when we enter a new level
1053 $toc.=$sk->tocIndent($level-$prevlevel);
1054 $toclevel+
=$level-$prevlevel;
1057 if(($nh||
$st) && $level<$prevlevel) {
1058 $h[$level+
1]=0; // reset when we step back a level
1059 $toc.=$sk->tocUnindent($prevlevel-$level);
1060 $toclevel-=$prevlevel-$level;
1063 $h[$level]++
; // count number of headlines for each level
1066 for($i=1;$i<=$level;$i++
) {
1068 if($dot) {$numbering.=".";}
1075 // The canonized header is a version of the header text safe to use for links
1077 $canonized_headline=preg_replace("/<.*?>/","",$headline); // strip out HTML
1078 $tocline = trim( $canonized_headline );
1079 $canonized_headline=str_replace('"',"",$canonized_headline);
1080 $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1081 $refer[$c]=$canonized_headline;
1082 $refers[$canonized_headline]++
; // count how many in assoc. array so we can track dupes in anchors
1083 $refcount[$c]=$refers[$canonized_headline];
1085 // Prepend the number to the heading text
1088 $tocline=$numbering ." ". $tocline;
1090 // Don't number the heading if it is the only one (looks silly)
1091 if($nh && count($matches[3]) > 1) {
1092 $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1096 // Create the anchor for linking from the TOC to the section
1098 $anchor=$canonized_headline;
1099 if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1101 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1103 if($es && !isset($wpPreview)) {
1104 $head[$c].=$sk->editSectionLink($c+
1);
1107 // Put it all together
1109 $head[$c].="<h".$level.$matches[2][$c]
1110 ."<a name=\"".$anchor."\">"
1115 // Add the edit section link
1117 if($esr && !isset($wpPreview)) {
1118 $head[$c]=$sk->editSectionScript($c+
1,$head[$c]);
1128 $toc.=$sk->tocUnindent($toclevel);
1129 $toc=$sk->tocTable($toc);
1132 // split up and insert constructed headlines
1134 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1137 foreach($blocks as $block) {
1138 if(($es) && !isset($wpPreview) && $c>0 && $i==0) {
1139 # This is the [edit] link that appears for the top block of text when
1140 # section editing is enabled
1141 $full.=$sk->editSectionLink(0);
1144 if($st && $toclines>3 && !$i) {
1145 # Let's add a top anchor just in case we want to link to the top of the page
1146 $full="<a name=\"top\"></a>".$full.$toc;
1156 /* private */ function magicISBN( $text )
1160 $a = split( "ISBN ", " $text" );
1161 if ( count ( $a ) < 2 ) return $text;
1162 $text = substr( array_shift( $a ), 1);
1163 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1165 foreach ( $a as $x ) {
1166 $isbn = $blank = "" ;
1167 while ( " " == $x{0} ) {
1169 $x = substr( $x, 1 );
1171 while ( strstr( $valid, $x{0} ) != false ) {
1173 $x = substr( $x, 1 );
1175 $num = str_replace( "-", "", $isbn );
1176 $num = str_replace( " ", "", $num );
1179 $text .= "ISBN $blank$x";
1181 $text .= "<a href=\"" . wfLocalUrlE( $wgLang->specialPage(
1182 "Booksources"), "isbn={$num}" ) . "\" class=\"internal\">ISBN $isbn</a>";
1189 /* private */ function magicRFC( $text )
1199 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1201 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1202 $containsOldMagic = false )
1204 $this->mText
= $text;
1205 $this->mLanguageLinks
= $languageLinks;
1206 $this->mCategoryLinks
= $categoryLinks;
1207 $this->mContainsOldMagic
= $containsOldMagic;
1210 function getText() { return $this->mText
; }
1211 function getLanguageLinks() { return $this->mLanguageLinks
; }
1212 function getCategoryLinks() { return $this->mCategoryLinks
; }
1213 function containsOldMagic() { return $this->mContainsOldMagic
; }
1214 function setText( $text ) { return wfSetVar( $this->mText
, $text ); }
1215 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks
, $ll ); }
1216 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks
, $cl ); }
1217 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic
, $com ); }
1220 # Regex callbacks, used in OutputPage::replaceVariables
1222 # Just get rid of the dangerous stuff
1223 # Necessary because replaceVariables is called after removeHTMLtags,
1224 # and message text can come from any user
1225 function wfReplaceMsgVar( $matches ) {
1226 global $wgCurOut, $wgLinkCache;
1227 $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1228 $wgLinkCache->suspend();
1229 $text = $wgCurOut->replaceInternalLinks( $text );
1230 $wgLinkCache->resume();
1231 $wgLinkCache->addLinkObj( Title
::makeTitle( NS_MEDIAWIKI
, $matches[1] ) );
1235 # Effective <nowiki></nowiki>
1236 # Not real <nowiki> because this is called after nowiki sections are processed
1237 function wfReplaceMsgnwVar( $matches ) {
1238 global $wgCurOut, $wgLinkCache;
1239 $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1240 $wgLinkCache->addLinkObj( Title
::makeTitle( NS_MEDIAWIKI
, $matches[1] ) );