3 include_once('Tokenizer.php');
5 if( $GLOBALS['wgUseWikiHiero'] ){
6 include_once('wikihiero.php');
11 # Processes wiki markup
13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
17 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
21 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
22 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
25 # * only within ParserOptions
28 #----------------------------------------
29 # Variable substitution O(N^2) attack
30 #-----------------------------------------
31 # Without countermeasures, it would be possible to attack the parser by saving a page
32 # filled with a large number of inclusions of large pages. The size of the generated
33 # page would be proportional to the square of the input size. Hence, we limit the number
34 # of inclusions of any given page, thus bringing any attack back to O(N).
37 define( "MAX_INCLUDE_REPEAT", 5 );
39 # Allowed values for $mOutputType
40 define( "OT_HTML", 1 );
41 define( "OT_WIKI", 2 );
42 define( "OT_MSG", 3 );
44 # prefix for escaping, used in two functions at least
45 define( "UNIQ_PREFIX", "NaodW29");
49 # Cleared with clearState():
50 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
51 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
54 var $mOptions, $mTitle, $mOutputType;
63 $this->mOutput
= new ParserOutput
;
64 $this->mAutonumber
= 0;
65 $this->mLastSection
= "";
66 $this->mDTopen
= false;
67 $this->mVariables
= false;
68 $this->mIncludeCount
= array();
69 $this->mStripState
= array();
70 $this->mArgStack
= array();
73 # First pass--just handle <nowiki> sections, pass the rest off
74 # to internalParse() which does all the real work.
76 # Returns a ParserOutput
78 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
80 $fname = "Parser::parse";
81 wfProfileIn( $fname );
87 $this->mOptions
= $options;
88 $this->mTitle
=& $title;
89 $this->mOutputType
= OT_HTML
;
92 $text = $this->strip( $text, $this->mStripState
);
93 $text = $this->internalParse( $text, $linestart );
94 $text = $this->unstrip( $text, $this->mStripState
);
95 # Clean up special characters, only run once, next-to-last before doBlockLevels
97 "/<hr *>/i" => '<hr/>',
98 "/<br *>/i" => '<br/>',
99 "/<center *>/i"=>'<div class="center">',
100 "/<\\/center *>/i" => '</div>',
101 # Clean up spare ampersands; note that we probably ought to be
102 # more careful about named entities.
103 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&'
105 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
108 $text = $this->doBlockLevels( $text, $linestart );
110 $this->mOutput
->setText( $text );
111 wfProfileOut( $fname );
112 return $this->mOutput
;
115 /* static */ function getRandomString()
117 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
120 # Replaces all occurrences of <$tag>content</$tag> in the text
121 # with a random marker and returns the new text. the output parameter
122 # $content will be an associative array filled with data on the form
123 # $unique_marker => content.
125 # If $content is already set, the additional entries will be appended
127 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
128 $rnd = $uniq_prefix . '-' . $tag . Parser
::getRandomString();
135 while ( "" != $text ) {
136 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
138 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) {
141 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
142 $marker = $rnd . sprintf("%08X", $n++
);
143 $content[$marker] = $q[0];
144 $stripped .= $marker;
151 # Strips <nowiki>, <pre> and <math>
152 # Returns the text, and fills an array with data needed in unstrip()
153 # If the $state is already a valid strip state, it adds to the state
155 function strip( $text, &$state )
157 $render = ($this->mOutputType
== OT_HTML
);
158 $nowiki_content = array();
159 $hiero_content = array();
160 $math_content = array();
161 $pre_content = array();
162 $item_content = array();
164 # Replace any instances of the placeholders
165 $uniq_prefix = UNIQ_PREFIX
;
166 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
168 $text = Parser
::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
169 foreach( $nowiki_content as $marker => $content ){
171 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
173 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
177 if( $GLOBALS['wgUseWikiHiero'] ){
178 $text = Parser
::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
179 foreach( $hiero_content as $marker => $content ){
181 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML
);
183 $hiero_content[$marker] = "<hiero>$content</hiero>";
188 if( $this->mOptions
->getUseTeX() ){
189 $text = Parser
::extractTags("math", $text, $math_content, $uniq_prefix);
190 foreach( $math_content as $marker => $content ){
192 $math_content[$marker] = renderMath( $content );
194 $math_content[$marker] = "<math>$content</math>";
199 $text = Parser
::extractTags("pre", $text, $pre_content, $uniq_prefix);
200 foreach( $pre_content as $marker => $content ){
202 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
204 $pre_content[$marker] = "<pre>$content</pre>";
208 # Merge state with the pre-existing state, if there is one
210 $state['nowiki'] = $state['nowiki'] +
$nowiki_content;
211 $state['hiero'] = $state['hiero'] +
$hiero_content;
212 $state['math'] = $state['math'] +
$math_content;
213 $state['pre'] = $state['pre'] +
$pre_content;
216 'nowiki' => $nowiki_content,
217 'hiero' => $hiero_content,
218 'math' => $math_content,
219 'pre' => $pre_content,
220 'item' => $item_content
226 function unstrip( $text, &$state )
228 # Must expand in reverse order, otherwise nested tags will be corrupted
229 $contentDict = end( $state );
230 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
231 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
232 $text = str_replace( key( $contentDict ), $content, $text );
239 # Add an item to the strip state
240 # Returns the unique tag which must be inserted into the stripped text
241 # The tag will be replaced with the original text in unstrip()
243 function insertStripItem( $text, &$state )
245 $rnd = UNIQ_PREFIX
. '-item' . Parser
::getRandomString();
255 $state['item'][$rnd] = $text;
259 # This method generates the list of subcategories and pages for a category
260 function categoryMagic ()
262 global $wgLang , $wgUser ;
263 if ( !$this->mOptions
->getUseCategoryMagic() ) return ; # Doesn't use categories at all
265 $cns = Namespace::getCategory() ;
266 if ( $this->mTitle
->getNamespace() != $cns ) return "" ; # This ain't a category page
268 $r = "<br style=\"clear:both;\"/>\n";
271 $sk =& $wgUser->getSkin() ;
273 $articles = array() ;
274 $children = array() ;
276 $id = $this->mTitle
->getArticleID() ;
278 # For existing categories
280 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
281 $res = wfQuery ( $sql, DB_READ
) ;
282 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
284 # For non-existing categories
285 $t = wfStrencode( $this->mTitle
->getPrefixedDBKey() );
286 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to='$t' AND bl_from=cur_id" ;
287 $res = wfQuery ( $sql, DB_READ
) ;
288 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
291 # For all pages that link to this category
292 foreach ( $data AS $x )
294 $t = $wgLang->getNsText ( $x->cur_namespace
) ;
295 if ( $t != "" ) $t .= ":" ;
296 $t .= $x->cur_title
;
298 if ( $x->cur_namespace
== $cns ) {
299 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
301 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
304 wfFreeResult ( $res ) ;
306 # Showing subcategories
307 if ( count ( $children ) > 0 )
309 asort ( $children ) ;
310 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
311 $r .= implode ( ", " , $children ) ;
314 # Showing pages in this category
315 if ( count ( $articles ) > 0 )
317 $ti = $this->mTitle
->getText() ;
318 asort ( $articles ) ;
319 $h = wfMsg( "category_header", $ti );
320 $r .= "<h2>{$h}</h2>\n" ;
321 $r .= implode ( ", " , $articles ) ;
328 function getHTMLattrs ()
330 $htmlattrs = array( # Allowed attributes--no scripting, etc.
331 "title", "align", "lang", "dir", "width", "height",
332 "bgcolor", "clear", /* BR */ "noshade", /* HR */
333 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
334 /* FONT */ "type", "start", "value", "compact",
335 /* For various lists, mostly deprecated but safe */
336 "summary", "width", "border", "frame", "rules",
337 "cellspacing", "cellpadding", "valign", "char",
338 "charoff", "colgroup", "col", "span", "abbr", "axis",
339 "headers", "scope", "rowspan", "colspan", /* Tables */
340 "id", "class", "name", "style" /* For CSS */
345 function fixTagAttributes ( $t )
347 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
348 $htmlattrs = $this->getHTMLattrs() ;
350 # Strip non-approved attributes from the tag
352 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
353 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
355 # Strip javascript "expression" from stylesheets. Brute force approach:
356 # If anythin offensive is found, all attributes of the HTML tag are dropped
359 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
360 wfMungeToUtf8( $t ) ) )
368 function doTableStuff ( $t )
370 $t = explode ( "\n" , $t ) ;
371 $td = array () ; # Is currently a td tag open?
372 $ltd = array () ; # Was it TD or TH?
373 $tr = array () ; # Is currently a tr tag open?
374 $ltr = array () ; # tr attributes
375 foreach ( $t AS $k => $x )
378 $fc = substr ( $x , 0 , 1 ) ;
379 if ( "{|" == substr ( $x , 0 , 2 ) )
381 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
382 array_push ( $td , false ) ;
383 array_push ( $ltd , "" ) ;
384 array_push ( $tr , false ) ;
385 array_push ( $ltr , "" ) ;
387 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
388 else if ( "|}" == substr ( $x , 0 , 2 ) )
391 $l = array_pop ( $ltd ) ;
392 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
393 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
397 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
399 $z = trim ( substr ( $x , 2 ) ) ;
400 $t[$k] = "<caption>{$z}</caption>\n" ;
402 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
404 $x = substr ( $x , 1 ) ;
405 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
407 $l = array_pop ( $ltd ) ;
408 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
409 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
412 array_push ( $tr , false ) ;
413 array_push ( $td , false ) ;
414 array_push ( $ltd , "" ) ;
415 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
417 else if ( "|" == $fc ||
"!" == $fc ||
"|+" == substr ( $x , 0 , 2 ) ) # Caption
419 if ( "|+" == substr ( $x , 0 , 2 ) )
422 $x = substr ( $x , 1 ) ;
424 $after = substr ( $x , 1 ) ;
425 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
426 $after = explode ( "||" , $after ) ;
428 foreach ( $after AS $theline )
433 $tra = array_pop ( $ltr ) ;
434 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
435 array_push ( $tr , true ) ;
436 array_push ( $ltr , "" ) ;
439 $l = array_pop ( $ltd ) ;
440 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
441 if ( $fc == "|" ) $l = "td" ;
442 else if ( $fc == "!" ) $l = "th" ;
443 else if ( $fc == "+" ) $l = "caption" ;
445 array_push ( $ltd , $l ) ;
446 $y = explode ( "|" , $theline , 2 ) ;
447 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
448 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
450 array_push ( $td , true ) ;
455 # Closing open td, tr && table
456 while ( count ( $td ) > 0 )
458 if ( array_pop ( $td ) ) $t[] = "</td>" ;
459 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
463 $t = implode ( "\n" , $t ) ;
464 # $t = $this->removeHTMLtags( $t );
468 function internalParse( $text, $linestart, $args = array() )
470 $fname = "Parser::internalParse";
471 wfProfileIn( $fname );
473 $text = $this->removeHTMLtags( $text );
474 $text = $this->replaceVariables( $text, $args );
476 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
478 $text = $this->doHeadings( $text );
479 if($this->mOptions
->getUseDynamicDates()) {
480 global $wgDateFormatter;
481 $text = $wgDateFormatter->reformat( $this->mOptions
->getDateFormat(), $text );
483 $text = $this->replaceExternalLinks( $text );
484 $text = $this->doTokenizedParser ( $text );
485 $text = $this->doTableStuff ( $text ) ;
486 $text = $this->formatHeadings( $text );
487 $sk =& $this->mOptions
->getSkin();
488 $text = $sk->transformContent( $text );
490 if ( !isset ( $this->categoryMagicDone
) ) {
491 $text .= $this->categoryMagic () ;
492 $this->categoryMagicDone
= true ;
495 wfProfileOut( $fname );
500 /* private */ function doHeadings( $text )
502 for ( $i = 6; $i >= 1; --$i ) {
503 $h = substr( "======", 0, $i );
504 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
505 "<h{$i}>\\1</h{$i}>\\2", $text );
510 # Note: we have to do external links before the internal ones,
511 # and otherwise take great care in the order of things here, so
512 # that we don't end up interpreting some URLs twice.
514 /* private */ function replaceExternalLinks( $text )
516 $fname = "Parser::replaceExternalLinks";
517 wfProfileIn( $fname );
518 $text = $this->subReplaceExternalLinks( $text, "http", true );
519 $text = $this->subReplaceExternalLinks( $text, "https", true );
520 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
521 $text = $this->subReplaceExternalLinks( $text, "irc", false );
522 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
523 $text = $this->subReplaceExternalLinks( $text, "news", false );
524 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
525 wfProfileOut( $fname );
529 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
531 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
532 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
534 # this is the list of separators that should be ignored if they
535 # are the last character of an URL but that should be included
536 # if they occur within the URL, e.g. "go to www.foo.com, where .."
537 # in this case, the last comma should not become part of the URL,
538 # but in "www.foo.com/123,2342,32.htm" it should.
540 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
541 $images = "gif|png|jpg|jpeg";
543 # PLEASE NOTE: The curly braces { } are not part of the regex,
544 # they are interpreted as part of the string (used to tell PHP
545 # that the content of the string should be inserted there).
546 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
547 "((?i){$images})([^{$uc}]|$)/";
549 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
550 $sk =& $this->mOptions
->getSkin();
552 if ( $autonumber and $this->mOptions
->getAllowExternalImages() ) { # Use img tags only for HTTP urls
553 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
554 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
556 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
557 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
558 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
560 $s = str_replace( $unique, $protocol, $s );
562 $a = explode( "[{$protocol}:", " " . $s );
563 $s = array_shift( $a );
564 $s = substr( $s, 1 );
566 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
567 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
569 foreach ( $a as $line ) {
570 if ( preg_match( $e1, $line, $m ) ) {
571 $link = "{$protocol}:{$m[1]}";
573 if ( $autonumber ) { $text = "[" . ++
$this->mAutonumber
. "]"; }
574 else { $text = wfEscapeHTML( $link ); }
575 } else if ( preg_match( $e2, $line, $m ) ) {
576 $link = "{$protocol}:{$m[1]}";
580 $s .= "[{$protocol}:" . $line;
583 if( $link == $text ||
preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
586 # Expand the URL for printable version
587 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
589 $la = $sk->getExternalLinkAttributes( $link, $text );
590 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
596 /* private */ function handle3Quotes( &$state, $token )
598 if ( $state["strong"] !== false ) {
599 if ( $state["em"] !== false && $state["em"] > $state["strong"] )
601 # ''' lala ''lala '''
602 $s = "</em></strong><em>";
606 $state["strong"] = FALSE;
609 $state["strong"] = isset($token["pos"]) ?
$token["pos"] : true;
614 /* private */ function handle2Quotes( &$state, $token )
616 if ( $state["em"] !== false ) {
617 if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
619 # ''lala'''lala'' ....'''
620 $s = "</strong></em><strong>";
624 $state["em"] = FALSE;
627 $state["em"] = isset($token["pos"]) ?
$token["pos"] : true;
633 /* private */ function handle5Quotes( &$state, $token )
636 if ( $state["em"] !== false && $state["strong"] !== false ) {
637 if ( $state["em"] < $state["strong"] ) {
638 $s .= "</strong></em>";
640 $s .= "</em></strong>";
642 $state["strong"] = $state["em"] = FALSE;
643 } elseif ( $state["em"] !== false ) {
644 $s .= "</em><strong>";
645 $state["em"] = FALSE;
646 $state["strong"] = $token["pos"];
647 } elseif ( $state["strong"] !== false ) {
648 $s .= "</strong><em>";
649 $state["strong"] = FALSE;
650 $state["em"] = $token["pos"];
651 } else { # not $em and not $strong
652 $s .= "<strong><em>";
653 $state["strong"] = $state["em"] = isset($token["pos"]) ?
$token["pos"] : true;
658 /* private */ function doTokenizedParser( $str )
660 global $wgLang; # for language specific parser hook
662 $tokenizer=Tokenizer
::newFromString( $str );
663 $tokenStack = array();
666 $state["em"] = FALSE;
667 $state["strong"] = FALSE;
671 # The tokenizer splits the text into tokens and returns them one by one.
672 # Every call to the tokenizer returns a new token.
673 while ( $token = $tokenizer->nextToken() )
675 switch ( $token["type"] )
678 # simple text with no further markup
679 $txt = $token["text"];
682 # Text that contains blanks that have to be converted to
683 # non-breakable spaces for French.
684 # U+202F NARROW NO-BREAK SPACE might be a better choice, but
685 # browser support for Unicode spacing is poor.
686 $txt = str_replace( " ", " ", $token["text"] );
689 # remember the tag opened with 3 [
693 # FIXME : Treat orphaned open tags (stack not empty when text is over)
695 array_push( $tokenStack, $token );
702 # get text from stack, glue it together, and call the code to handle a
705 if ( count( $tokenStack ) == 0 )
707 # stack empty. Found a ]] without an opening [[
711 $lastToken = array_pop( $tokenStack );
712 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
714 if( !empty( $lastToken["text"] ) ) {
715 $linkText = $lastToken["text"] . $linkText;
717 $lastToken = array_pop( $tokenStack );
720 $txt = $linkText ."]]";
722 if( isset( $lastToken["text"] ) ) {
723 $prefix = $lastToken["text"];
727 $nextToken = $tokenizer->previewToken();
728 if ( $nextToken["type"] == "text" )
730 # Preview just looks at it. Now we have to fetch it.
731 $nextToken = $tokenizer->nextToken();
732 $txt .= $nextToken["text"];
734 $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState
), $prefix );
736 # did the tag start with 3 [ ?
738 # show the first as text
744 $tagIsOpen = (count( $tokenStack ) != 0);
750 # This and the three next ones handle quotes
751 $txt = $this->handle3Quotes( $state, $token );
754 $txt = $this->handle2Quotes( $state, $token );
757 $txt = $this->handle5Quotes( $state, $token );
767 $txt = $this->doMagicRFC( $tokenizer );
774 $txt = $this->doMagicISBN( $tokenizer );
778 # Call language specific Hook.
779 $txt = $wgLang->processToken( $token, $tokenStack );
780 if ( NULL == $txt ) {
781 # An unkown token. Highlight.
782 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
783 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
787 # If we're parsing the interior of a link, don't append the interior to $s,
788 # but push it to the stack so it can be processed when a ]] token is found.
789 if ( $tagIsOpen && $txt != "" ) {
790 $token["type"] = "text";
791 $token["text"] = $txt;
792 array_push( $tokenStack, $token );
797 if ( count( $tokenStack ) != 0 )
799 # still objects on stack. opened [[ tag without closing ]] tag.
801 while ( $lastToken = array_pop( $tokenStack ) )
803 if ( $lastToken["type"] == "text" )
805 $txt = $lastToken["text"] . $txt;
807 $txt = $lastToken["type"] . $txt;
815 /* private */ function handleInternalLink( $line, $prefix )
817 global $wgLang, $wgLinkCache;
818 global $wgNamespacesWithSubpages, $wgLanguageCode;
819 static $fname = "Parser::handleInternalLink" ;
820 wfProfileIn( $fname );
822 wfProfileIn( "$fname-setup" );
824 if ( !$tc ) { $tc = Title
::legalChars() . "#"; }
825 $sk =& $this->mOptions
->getSkin();
827 # Match a link having the form [[namespace:link|alternate]]trail
829 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
830 # Match the end of a line for a word that's not followed by whitespace,
831 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
832 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
833 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
834 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
837 # Special and Media are pseudo-namespaces; no pages actually exist in them
838 static $image = FALSE;
839 static $special = FALSE;
840 static $media = FALSE;
841 static $category = FALSE;
842 if ( !$image ) { $image = Namespace::getImage(); }
843 if ( !$special ) { $special = Namespace::getSpecial(); }
844 if ( !$media ) { $media = Namespace::getMedia(); }
845 if ( !$category ) { $category = Namespace::getCategory(); ; }
847 $nottalk = !Namespace::isTalk( $this->mTitle
->getNamespace() );
849 wfProfileOut( "$fname-setup" );
852 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
855 } else { # Invalid form; output directly
856 $s .= $prefix . "[[" . $line ;
862 :Foobar -- override special treatment of prefix (images, language links)
863 /Foobar -- convert to CurrentPage/Foobar
864 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
866 $c = substr($m[1],0,1);
867 $noforce = ($c != ":");
868 if( $c == "/" ) { # subpage
869 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
870 $m[1]=substr($m[1],1,strlen($m[1])-2);
873 $noslash=substr($m[1],1);
875 if($wgNamespacesWithSubpages[$this->mTitle
->getNamespace()]) { # subpages allowed here
876 $link = $this->mTitle
->getPrefixedText(). "/" . trim($noslash);
879 } # this might be changed for ugliness reasons
881 $link = $noslash; # no subpage allowed, use standard link
883 } elseif( $noforce ) { # no subpage
886 $link = substr( $m[1], 1 );
891 $nt = Title
::newFromText( $link );
893 $s .= $prefix . "[[" . $line;
896 $ns = $nt->getNamespace();
897 $iw = $nt->getInterWiki();
899 if( $iw && $this->mOptions
->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
900 array_push( $this->mOutput
->mLanguageLinks
, $nt->getPrefixedText() );
901 return (trim($s) == '')?
'': $s;
903 if( $ns == $image ) {
904 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
905 $wgLinkCache->addImageLinkObj( $nt );
908 if ( $ns == $category ) {
909 $t = $nt->getText() ;
910 $nnt = Title
::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
911 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
912 $this->mOutput
->mCategoryLinks
[] = $t ;
913 $s .= $prefix . $trail ;
917 if( ( $nt->getPrefixedText() == $this->mTitle
->getPrefixedText() ) &&
918 ( strpos( $link, "#" ) == FALSE ) ) {
919 # Self-links are handled specially; generally de-link and change to bold.
920 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
924 if( $ns == $media ) {
925 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
926 $wgLinkCache->addImageLinkObj( $nt );
928 } elseif( $ns == $special ) {
929 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
932 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
934 wfProfileOut( $fname );
938 # Some functions here used by doBlockLevels()
940 /* private */ function closeParagraph()
943 if ( '' != $this->mLastSection
) {
944 $result = "</" . $this->mLastSection
. ">\n";
946 $this->mInPre
= false;
947 $this->mLastSection
= "";
950 # getCommon() returns the length of the longest common substring
951 # of both arguments, starting at the beginning of both.
953 /* private */ function getCommon( $st1, $st2 )
955 $fl = strlen( $st1 );
956 $shorter = strlen( $st2 );
957 if ( $fl < $shorter ) { $shorter = $fl; }
959 for ( $i = 0; $i < $shorter; ++
$i ) {
960 if ( $st1{$i} != $st2{$i} ) { break; }
964 # These next three functions open, continue, and close the list
965 # element appropriate to the prefix character passed into them.
967 /* private */ function openList( $char )
969 $result = $this->closeParagraph();
971 if ( "*" == $char ) { $result .= "<ul><li>"; }
972 else if ( "#" == $char ) { $result .= "<ol><li>"; }
973 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
974 else if ( ";" == $char ) {
975 $result .= "<dl><dt>";
976 $this->mDTopen
= true;
978 else { $result = "<!-- ERR 1 -->"; }
983 /* private */ function nextItem( $char )
985 if ( "*" == $char ||
"#" == $char ) { return "</li><li>"; }
986 else if ( ":" == $char ||
";" == $char ) {
988 if ( $this->mDTopen
) { $close = "</dt>"; }
989 if ( ";" == $char ) {
990 $this->mDTopen
= true;
991 return $close . "<dt>";
993 $this->mDTopen
= false;
994 return $close . "<dd>";
997 return "<!-- ERR 2 -->";
1000 /* private */function closeList( $char )
1002 if ( "*" == $char ) { $text = "</li></ul>"; }
1003 else if ( "#" == $char ) { $text = "</li></ol>"; }
1004 else if ( ":" == $char ) {
1005 if ( $this->mDTopen
) {
1006 $this->mDTopen
= false;
1007 $text = "</dt></dl>";
1009 $text = "</dd></dl>";
1012 else { return "<!-- ERR 3 -->"; }
1016 /* private */ function doBlockLevels( $text, $linestart )
1018 $fname = "Parser::doBlockLevels";
1019 wfProfileIn( $fname );
1020 # Parsing through the text line by line. The main thing
1021 # happening here is handling of block-level elements p, pre,
1022 # and making lists from lines starting with * # : etc.
1024 $a = explode( "\n", $text );
1026 $lastPref = $text = $lastLine = '';
1027 $this->mDTopen
= $inBlockElem = false;
1031 if ( ! $linestart ) { $text .= array_shift( $a ); }
1032 foreach ( $a as $t ) {
1034 $opl = strlen( $lastPref );
1035 $preCloseMatch = preg_match("/<\\/pre/i", $t );
1036 $preOpenMatch = preg_match("/<pre/i", $t );
1037 if (!$this->mInPre
) {
1038 $this->mInPre
= !empty($preOpenMatch);
1040 if ( !$this->mInPre
) {
1041 $npl = strspn( $t, "*#:;" );
1042 $pref = substr( $t, 0, $npl );
1043 $pref2 = str_replace( ";", ":", $pref );
1044 $t = substr( $t, $npl );
1047 $pref = $pref2 = '';
1051 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
1052 $text .= $this->nextItem( substr( $pref, -1 ) );
1053 if ( $pstack ) { $pstack = false; }
1055 if ( ";" == substr( $pref, -1 ) ) {
1056 $cpos = strpos( $t, ":" );
1057 if ( false !== $cpos ) {
1058 $term = substr( $t, 0, $cpos );
1059 $text .= $term . $this->nextItem( ":" );
1060 $t = substr( $t, $cpos +
1 );
1063 } else if (0 != $npl ||
0 != $opl) {
1064 $cpl = $this->getCommon( $pref, $lastPref );
1065 if ( $pstack ) { $pstack = false; }
1067 while ( $cpl < $opl ) {
1068 $text .= $this->closeList( $lastPref{$opl-1} );
1071 if ( $npl <= $cpl && $cpl > 0 ) {
1072 $text .= $this->nextItem( $pref{$cpl-1} );
1074 while ( $npl > $cpl ) {
1075 $char = substr( $pref, $cpl, 1 );
1076 $text .= $this->openList( $char );
1078 if ( ";" == $char ) {
1079 $cpos = strpos( $t, ":" );
1080 if ( ! ( false === $cpos ) ) {
1081 $term = substr( $t, 0, $cpos );
1082 $text .= $term . $this->nextItem( ":" );
1083 $t = substr( $t, $cpos +
1 );
1090 if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
1091 $uniq_prefix = UNIQ_PREFIX
;
1092 // XXX: use a stack for nestable elements like span, table and div
1093 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1094 $closematch = preg_match(
1095 "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1096 "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1097 if ( $openmatch or $closematch ) {
1098 if ( $pstack ) { $pstack = false; }
1099 $text .= $this->closeParagraph();
1100 if($preOpenMatch and !$preCloseMatch) {
1101 $this->mInPre
= true;
1103 if ( $closematch ) {
1104 $inBlockElem = false;
1106 $inBlockElem = true;
1108 } else if ( !$inBlockElem ) {
1109 if ( " " == $t{0} ) {
1111 if ($this->mLastSection
!= 'pre') {
1113 $text .= $this->closeParagraph().'<pre>';
1114 $this->mLastSection
= 'pre';
1118 if ( '' == trim($t) ) {
1120 $text .= $pstack.'<br/>';
1122 $this->mLastSection
= 'p';
1124 if ($this->mLastSection
!= 'p' ) {
1125 $text .= $this->closeParagraph();
1126 $this->mLastSection
= '';
1129 $pstack = '</p><p>';
1136 $this->mLastSection
= 'p';
1137 } else if ($this->mLastSection
!= 'p') {
1138 $text .= $this->closeParagraph().'<p>';
1139 $this->mLastSection
= 'p';
1145 if ($pstack === false) {
1150 $text .= $this->closeList( $pref2{$npl-1} );
1153 if ( "" != $this->mLastSection
) {
1154 $text .= "</" . $this->mLastSection
. ">";
1155 $this->mLastSection
= "";
1158 wfProfileOut( $fname );
1162 function getVariableValue( $index ) {
1163 global $wgLang, $wgSitename, $wgServer;
1166 case MAG_CURRENTMONTH
:
1168 case MAG_CURRENTMONTHNAME
:
1169 return $wgLang->getMonthName( date("n") );
1170 case MAG_CURRENTMONTHNAMEGEN
:
1171 return $wgLang->getMonthNameGen( date("n") );
1172 case MAG_CURRENTDAY
:
1175 return $this->mTitle
->getText();
1177 return Namespace::getCanonicalName($this->mTitle
->getNamespace());
1178 case MAG_CURRENTDAYNAME
:
1179 return $wgLang->getWeekdayName( date("w")+
1 );
1180 case MAG_CURRENTYEAR
:
1182 case MAG_CURRENTTIME
:
1183 return $wgLang->time( wfTimestampNow(), false );
1184 case MAG_NUMBEROFARTICLES
:
1185 return wfNumberOfArticles();
1195 function initialiseVariables()
1197 global $wgVariableIDs;
1198 $this->mVariables
= array();
1199 foreach ( $wgVariableIDs as $id ) {
1200 $mw =& MagicWord
::get( $id );
1201 $mw->addToArray( $this->mVariables
, $this->getVariableValue( $id ) );
1205 /* private */ function replaceVariables( $text, $args = array() )
1207 global $wgLang, $wgScript, $wgArticlePath;
1209 $fname = "Parser::replaceVariables";
1210 wfProfileIn( $fname );
1213 if ( !$this->mVariables
) {
1214 $this->initialiseVariables();
1216 $titleChars = Title
::legalChars();
1217 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1219 # This function is called recursively. To keep track of arguments we need a stack:
1220 array_push( $this->mArgStack
, $args );
1222 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1223 $GLOBALS['wgCurParser'] =& $this;
1224 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1226 array_pop( $this->mArgStack
);
1231 function braceSubstitution( $matches )
1233 global $wgLinkCache, $wgLang;
1234 $fname = "Parser::braceSubstitution";
1239 # $newline is an optional newline character before the braces
1240 # $part1 is the bit before the first |, and must contain only title characters
1241 # $args is a list of arguments, starting from index 0, not including $part1
1243 $newline = $matches[1];
1244 $part1 = $matches[2];
1245 # If the third subpattern matched anything, it will start with |
1246 if ( $matches[3] !== "" ) {
1247 $args = explode( "|", substr( $matches[3], 1 ) );
1251 $argc = count( $args );
1254 $mwSubst =& MagicWord
::get( MAG_SUBST
);
1255 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1256 if ( $this->mOutputType
!= OT_WIKI
) {
1257 # Invalid SUBST not replaced at PST time
1258 # Return without further processing
1259 $text = $matches[0];
1262 } elseif ( $this->mOutputType
== OT_WIKI
) {
1263 # SUBST not found in PST pass, do nothing
1264 $text = $matches[0];
1268 # MSG, MSGNW and INT
1271 $mwMsgnw =& MagicWord
::get( MAG_MSGNW
);
1272 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1275 # Remove obsolete MSG:
1276 $mwMsg =& MagicWord
::get( MAG_MSG
);
1277 $mwMsg->matchStartAndRemove( $part1 );
1280 # Check if it is an internal message
1281 $mwInt =& MagicWord
::get( MAG_INT
);
1282 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1283 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1284 $text = wfMsgReal( $part1, $args, true );
1292 # Check for NS: (namespace expansion)
1293 $mwNs = MagicWord
::get( MAG_NS
);
1294 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1295 if ( intval( $part1 ) ) {
1296 $text = $wgLang->getNsText( intval( $part1 ) );
1299 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1300 if ( !is_null( $index ) ) {
1301 $text = $wgLang->getNsText( $index );
1308 # LOCALURL and LOCALURLE
1310 $mwLocal = MagicWord
::get( MAG_LOCALURL
);
1311 $mwLocalE = MagicWord
::get( MAG_LOCALURLE
);
1313 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1314 $func = 'getLocalURL';
1315 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1316 $func = 'escapeLocalURL';
1321 if ( $func !== '' ) {
1322 $title = Title
::newFromText( $part1 );
1323 if ( !is_null( $title ) ) {
1325 $text = $title->$func( $args[0] );
1327 $text = $title->$func();
1334 # Internal variables
1335 if ( !$found && array_key_exists( $part1, $this->mVariables
) ) {
1336 $text = $this->mVariables
[$part1];
1338 $this->mOutput
->mContainsOldMagic
= true;
1341 # Arguments input from the caller
1342 $inputArgs = end( $this->mArgStack
);
1343 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1344 $text = $inputArgs[$part1];
1348 # Load from database
1350 $title = Title
::newFromText( $part1, NS_TEMPLATE
);
1351 if ( !is_null( $title ) && !$title->isExternal() ) {
1352 # Check for excessive inclusion
1353 $dbk = $title->getPrefixedDBkey();
1354 if ( $this->incrementIncludeCount( $dbk ) ) {
1355 $article = new Article( $title );
1356 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1357 if ( $articleContent !== false ) {
1359 $text = $articleContent;
1364 # If the title is valid but undisplayable, make a link to it
1365 if ( $this->mOutputType
== OT_HTML
&& !$found ) {
1366 $text = "[[" . $title->getPrefixedText() . "]]";
1372 # Recursive parsing, escaping and link table handling
1373 # Only for HTML output
1374 if ( $nowiki && $found && $this->mOutputType
== OT_HTML
) {
1375 $text = wfEscapeWikiText( $text );
1376 } elseif ( $this->mOutputType
== OT_HTML
&& $found ) {
1377 # Clean up argument array
1378 $assocArgs = array();
1380 foreach( $args as $arg ) {
1381 $eqpos = strpos( $arg, "=" );
1382 if ( $eqpos === false ) {
1383 $assocArgs[$index++
] = $arg;
1385 $name = trim( substr( $arg, 0, $eqpos ) );
1386 $value = trim( substr( $arg, $eqpos+
1 ) );
1387 if ( $value === false ) {
1390 if ( $name !== false ) {
1391 $assocArgs[$name] = $value;
1396 # Do not enter included links in link table
1397 if ( !is_null( $title ) ) {
1398 $wgLinkCache->suspend();
1401 # Run full parser on the included text
1402 $text = $this->strip( $text, $this->mStripState
);
1403 $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
1405 # Add the result to the strip state for re-inclusion after
1406 # the rest of the processing
1407 $text = $this->insertStripItem( $text, $this->mStripState
);
1409 # Resume the link cache and register the inclusion as a link
1410 if ( !is_null( $title ) ) {
1411 $wgLinkCache->resume();
1412 $wgLinkCache->addLinkObj( $title );
1419 return $newline . $text;
1423 # Returns true if the function is allowed to include this entity
1424 function incrementIncludeCount( $dbk )
1426 if ( !array_key_exists( $dbk, $this->mIncludeCount
) ) {
1427 $this->mIncludeCount
[$dbk] = 0;
1429 if ( ++
$this->mIncludeCount
[$dbk] <= MAX_INCLUDE_REPEAT
) {
1437 # Cleans up HTML, removes dangerous tags and attributes
1438 /* private */ function removeHTMLtags( $text )
1440 $fname = "Parser::removeHTMLtags";
1441 wfProfileIn( $fname );
1442 $htmlpairs = array( # Tags that must be closed
1443 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1444 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1445 "strike", "strong", "tt", "var", "div", "center",
1446 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1447 "ruby", "rt" , "rb" , "rp", "p"
1449 $htmlsingle = array(
1450 "br", "hr", "li", "dt", "dd"
1452 $htmlnest = array( # Tags that can be nested--??
1453 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1454 "dl", "font", "big", "small", "sub", "sup"
1456 $tabletags = array( # Can only appear inside table
1460 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1461 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1463 $htmlattrs = $this->getHTMLattrs () ;
1465 # Remove HTML comments
1466 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1468 $bits = explode( "<", $text );
1469 $text = array_shift( $bits );
1470 $tagstack = array(); $tablestack = array();
1472 foreach ( $bits as $x ) {
1473 $prev = error_reporting( E_ALL
& ~
( E_NOTICE | E_WARNING
) );
1474 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1476 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1477 error_reporting( $prev );
1480 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1484 if ( ! in_array( $t, $htmlsingle ) &&
1485 ( $ot = array_pop( $tagstack ) ) != $t ) {
1486 array_push( $tagstack, $ot );
1489 if ( $t == "table" ) {
1490 $tagstack = array_pop( $tablestack );
1495 # Keep track for later
1496 if ( in_array( $t, $tabletags ) &&
1497 ! in_array( "table", $tagstack ) ) {
1499 } else if ( in_array( $t, $tagstack ) &&
1500 ! in_array ( $t , $htmlnest ) ) {
1502 } else if ( ! in_array( $t, $htmlsingle ) ) {
1503 if ( $t == "table" ) {
1504 array_push( $tablestack, $tagstack );
1505 $tagstack = array();
1507 array_push( $tagstack, $t );
1509 # Strip non-approved attributes from the tag
1510 $newparams = $this->fixTagAttributes($params);
1514 $rest = str_replace( ">", ">", $rest );
1515 $text .= "<$slash$t $newparams$brace$rest";
1519 $text .= "<" . str_replace( ">", ">", $x);
1521 # Close off any remaining tags
1522 while ( $t = array_pop( $tagstack ) ) {
1524 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1526 wfProfileOut( $fname );
1532 * This function accomplishes several tasks:
1533 * 1) Auto-number headings if that option is enabled
1534 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1535 * 3) Add a Table of contents on the top for users who have enabled the option
1536 * 4) Auto-anchor headings
1538 * It loops through all headlines, collects the necessary data, then splits up the
1539 * string and re-inserts the newly formatted headlines.
1543 /* private */ function formatHeadings( $text )
1545 $doNumberHeadings = $this->mOptions
->getNumberHeadings();
1546 $doShowToc = $this->mOptions
->getShowToc();
1547 if( !$this->mTitle
->userCanEdit() ) {
1549 $rightClickHack = 0;
1551 $showEditLink = $this->mOptions
->getEditSection();
1552 $rightClickHack = $this->mOptions
->getEditSectionOnRightClick();
1555 # Inhibit editsection links if requested in the page
1556 $esw =& MagicWord
::get( MAG_NOEDITSECTION
);
1557 if( $esw->matchAndRemove( $text ) ) {
1560 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1562 $mw =& MagicWord
::get( MAG_NOTOC
);
1563 if( $mw->matchAndRemove( $text ) ) {
1567 # never add the TOC to the Main Page. This is an entry page that should not
1568 # be more than 1-2 screens large anyway
1569 if( $this->mTitle
->getPrefixedText() == wfMsg("mainpage") ) {
1573 # Get all headlines for numbering them and adding funky stuff like [edit]
1574 # links - this is for later, but we need the number of headlines right now
1575 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1577 # if there are fewer than 4 headlines in the article, do not show TOC
1578 if( $numMatches < 4 ) {
1582 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1583 # override above conditions and always show TOC
1584 $mw =& MagicWord
::get( MAG_FORCETOC
);
1585 if ($mw->matchAndRemove( $text ) ) {
1590 # We need this to perform operations on the HTML
1591 $sk =& $this->mOptions
->getSkin();
1596 # Ugh .. the TOC should have neat indentation levels which can be
1597 # passed to the skin functions. These are determined here
1602 $sublevelCount = array();
1605 foreach( $matches[3] as $headline ) {
1608 $prevlevel = $level;
1610 $level = $matches[1][$headlineCount];
1611 if( ( $doNumberHeadings ||
$doShowToc ) && $prevlevel && $level > $prevlevel ) {
1612 # reset when we enter a new level
1613 $sublevelCount[$level] = 0;
1614 $toc .= $sk->tocIndent( $level - $prevlevel );
1615 $toclevel +
= $level - $prevlevel;
1617 if( ( $doNumberHeadings ||
$doShowToc ) && $level < $prevlevel ) {
1618 # reset when we step back a level
1619 $sublevelCount[$level+
1]=0;
1620 $toc .= $sk->tocUnindent( $prevlevel - $level );
1621 $toclevel -= $prevlevel - $level;
1623 # count number of headlines for each level
1624 @$sublevelCount[$level]++
;
1625 if( $doNumberHeadings ||
$doShowToc ) {
1627 for( $i = 1; $i <= $level; $i++
) {
1628 if( !empty( $sublevelCount[$i] ) ) {
1632 $numbering .= $sublevelCount[$i];
1638 # The canonized header is a version of the header text safe to use for links
1639 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1640 $canonized_headline = $this->unstrip( $headline, $this->mStripState
);
1643 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1644 $tocline = trim( $canonized_headline );
1645 $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1646 $refer[$headlineCount] = $canonized_headline;
1648 # count how many in assoc. array so we can track dupes in anchors
1649 @$refers[$canonized_headline]++
;
1650 $refcount[$headlineCount]=$refers[$canonized_headline];
1652 # Prepend the number to the heading text
1654 if( $doNumberHeadings ||
$doShowToc ) {
1655 $tocline = $numbering . " " . $tocline;
1657 # Don't number the heading if it is the only one (looks silly)
1658 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1659 # the two are different if the line contains a link
1660 $headline=$numbering . " " . $headline;
1664 # Create the anchor for linking from the TOC to the section
1665 $anchor = $canonized_headline;
1666 if($refcount[$headlineCount] > 1 ) {
1667 $anchor .= "_" . $refcount[$headlineCount];
1670 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1672 if( $showEditLink ) {
1673 if ( empty( $head[$headlineCount] ) ) {
1674 $head[$headlineCount] = "";
1676 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+
1);
1679 # Add the edit section span
1680 if( $rightClickHack ) {
1681 $headline = $sk->editSectionScript($headlineCount+
1,$headline);
1684 # give headline the correct <h#> tag
1685 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1691 $toclines = $headlineCount;
1692 $toc .= $sk->tocUnindent( $toclevel );
1693 $toc = $sk->tocTable( $toc );
1696 # split up and insert constructed headlines
1698 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1701 foreach( $blocks as $block ) {
1702 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1703 # This is the [edit] link that appears for the top block of text when
1704 # section editing is enabled
1706 # Disabled because it broke block formatting
1707 # For example, a bullet point in the top line
1708 # $full .= $sk->editSectionLink(0);
1711 if( $doShowToc && !$i) {
1712 # Top anchor now in skin
1716 if( !empty( $head[$i] ) ) {
1725 /* private */ function doMagicISBN( &$tokenizer )
1729 # Check whether next token is a text token
1730 # If yes, fetch it and convert the text into a
1731 # Special::BookSources link
1732 $token = $tokenizer->previewToken();
1733 while ( $token["type"] == "" )
1735 $tokenizer->nextToken();
1736 $token = $tokenizer->previewToken();
1738 if ( $token["type"] == "text" )
1740 $token = $tokenizer->nextToken();
1741 $x = $token["text"];
1742 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1744 $isbn = $blank = "" ;
1745 while ( " " == $x{0} ) {
1747 $x = substr( $x, 1 );
1749 while ( strstr( $valid, $x{0} ) != false ) {
1751 $x = substr( $x, 1 );
1753 $num = str_replace( "-", "", $isbn );
1754 $num = str_replace( " ", "", $num );
1757 $text = "ISBN $blank$x";
1759 $titleObj = Title
::makeTitle( NS_SPECIAL
, "Booksources" );
1760 $text = "<a href=\"" .
1761 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1762 "\" class=\"internal\">ISBN $isbn</a>";
1770 /* private */ function doMagicRFC( &$tokenizer )
1774 # Check whether next token is a text token
1775 # If yes, fetch it and convert the text into a
1776 # link to an RFC source
1777 $token = $tokenizer->previewToken();
1778 while ( $token["type"] == "" )
1780 $tokenizer->nextToken();
1781 $token = $tokenizer->previewToken();
1783 if ( $token["type"] == "text" )
1785 $token = $tokenizer->nextToken();
1786 $x = $token["text"];
1787 $valid = "0123456789";
1789 $rfc = $blank = "" ;
1790 while ( " " == $x{0} ) {
1792 $x = substr( $x, 1 );
1794 while ( strstr( $valid, $x{0} ) != false ) {
1796 $x = substr( $x, 1 );
1800 $text .= "RFC $blank$x";
1802 $url = wfmsg( "rfcurl" );
1803 $url = str_replace( "$1", $rfc, $url);
1804 $sk =& $this->mOptions
->getSkin();
1805 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1806 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1814 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1816 $this->mOptions
= $options;
1817 $this->mTitle
=& $title;
1818 $this->mOutputType
= OT_WIKI
;
1820 if ( $clearState ) {
1821 $this->clearState();
1824 $stripState = false;
1828 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1831 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1832 "/<br *?>/i" => "<br/>",
1834 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1835 $text = $this->strip( $text, $stripState, false );
1836 $text = $this->pstPass2( $text, $user );
1837 $text = $this->unstrip( $text, $stripState );
1841 /* private */ function pstPass2( $text, &$user )
1843 global $wgLang, $wgLocaltimezone, $wgCurParser;
1845 # Variable replacement
1846 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1847 $text = $this->replaceVariables( $text );
1851 $n = $user->getName();
1852 $k = $user->getOption( "nickname" );
1853 if ( "" == $k ) { $k = $n; }
1854 if(isset($wgLocaltimezone)) {
1855 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1857 /* Note: this is an ugly timezone hack for the European wikis */
1858 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1859 " (" . date( "T" ) . ")";
1860 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1862 $text = preg_replace( "/~~~~~/", $d, $text );
1863 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1864 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1865 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1866 Namespace::getUser() ) . ":$n|$k]]", $text );
1868 # Context links: [[|name]] and [[name (context)|]]
1870 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1871 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1872 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1873 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1875 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1876 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1877 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1878 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1879 # [[ns:page (cont)|]]
1881 $t = $this->mTitle
->getText();
1882 if ( preg_match( $conpat, $t, $m ) ) {
1885 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1886 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1887 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1889 if ( "" == $context ) {
1890 $text = preg_replace( $p2, "[[\\1]]", $text );
1892 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1896 $mw =& MagicWord::get( MAG_SUBST );
1897 $wgCurParser = $this->fork();
1898 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1899 $this->merge( $wgCurParser );
1902 # Trim trailing whitespace
1903 # MAG_END (__END__) tag allows for trailing
1904 # whitespace to be deliberately included
1905 $text = rtrim( $text );
1906 $mw =& MagicWord
::get( MAG_END
);
1907 $mw->matchAndRemove( $text );
1912 # Set up some variables which are usually set up in parse()
1913 # so that an external function can call some class members with confidence
1914 function startExternalParse( &$title, $options, $outputType, $clearState = true )
1916 $this->mTitle
=& $title;
1917 $this->mOptions
= $options;
1918 $this->mOutputType
= $outputType;
1919 if ( $clearState ) {
1920 $this->clearState();
1924 function transformMsg( $text, $options ) {
1926 static $executing = false;
1928 # Guard against infinite recursion
1934 $this->mTitle
= $wgTitle;
1935 $this->mOptions
= $options;
1936 $this->mOutputType
= OT_MSG
;
1937 $this->clearState();
1938 $text = $this->replaceVariables( $text );
1947 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1949 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1950 $containsOldMagic = false )
1952 $this->mText
= $text;
1953 $this->mLanguageLinks
= $languageLinks;
1954 $this->mCategoryLinks
= $categoryLinks;
1955 $this->mContainsOldMagic
= $containsOldMagic;
1958 function getText() { return $this->mText
; }
1959 function getLanguageLinks() { return $this->mLanguageLinks
; }
1960 function getCategoryLinks() { return $this->mCategoryLinks
; }
1961 function containsOldMagic() { return $this->mContainsOldMagic
; }
1962 function setText( $text ) { return wfSetVar( $this->mText
, $text ); }
1963 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks
, $ll ); }
1964 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks
, $cl ); }
1965 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic
, $com ); }
1967 function merge( $other ) {
1968 $this->mLanguageLinks
= array_merge( $this->mLanguageLinks
, $other->mLanguageLinks
);
1969 $this->mCategoryLinks
= array_merge( $this->mCategoryLinks
, $this->mLanguageLinks
);
1970 $this->mContainsOldMagic
= $this->mContainsOldMagic ||
$other->mContainsOldMagic
;
1977 # All variables are private
1978 var $mUseTeX; # Use texvc to expand <math> tags
1979 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
1980 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
1981 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
1982 var $mAllowExternalImages; # Allow external images inline
1983 var $mSkin; # Reference to the preferred skin
1984 var $mDateFormat; # Date format index
1985 var $mEditSection; # Create "edit section" links
1986 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
1987 var $mNumberHeadings; # Automatically number headings
1988 var $mShowToc; # Show table of contents
1990 function getUseTeX() { return $this->mUseTeX
; }
1991 function getUseCategoryMagic() { return $this->mUseCategoryMagic
; }
1992 function getUseDynamicDates() { return $this->mUseDynamicDates
; }
1993 function getInterwikiMagic() { return $this->mInterwikiMagic
; }
1994 function getAllowExternalImages() { return $this->mAllowExternalImages
; }
1995 function getSkin() { return $this->mSkin
; }
1996 function getDateFormat() { return $this->mDateFormat
; }
1997 function getEditSection() { return $this->mEditSection
; }
1998 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick
; }
1999 function getNumberHeadings() { return $this->mNumberHeadings
; }
2000 function getShowToc() { return $this->mShowToc
; }
2002 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX
, $x ); }
2003 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic
, $x ); }
2004 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates
, $x ); }
2005 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic
, $x ); }
2006 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages
, $x ); }
2007 function setSkin( $x ) { return wfSetRef( $this->mSkin
, $x ); }
2008 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat
, $x ); }
2009 function setEditSection( $x ) { return wfSetVar( $this->mEditSection
, $x ); }
2010 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick
, $x ); }
2011 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings
, $x ); }
2012 function setShowToc( $x ) { return wfSetVar( $this->mShowToc
, $x ); }
2014 /* static */ function newFromUser( &$user )
2016 $popts = new ParserOptions
;
2017 $popts->initialiseFromUser( $user );
2021 function initialiseFromUser( &$userInput )
2023 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2025 if ( !$userInput ) {
2027 $user->setLoaded( true );
2029 $user =& $userInput;
2032 $this->mUseTeX
= $wgUseTeX;
2033 $this->mUseCategoryMagic
= $wgUseCategoryMagic;
2034 $this->mUseDynamicDates
= $wgUseDynamicDates;
2035 $this->mInterwikiMagic
= $wgInterwikiMagic;
2036 $this->mAllowExternalImages
= $wgAllowExternalImages;
2037 $this->mSkin
=& $user->getSkin();
2038 $this->mDateFormat
= $user->getOption( "date" );
2039 $this->mEditSection
= $user->getOption( "editsection" );
2040 $this->mEditSectionOnRightClick
= $user->getOption( "editsectiononrightclick" );
2041 $this->mNumberHeadings
= $user->getOption( "numberheadings" );
2042 $this->mShowToc
= $user->getOption( "showtoc" );
2048 # Regex callbacks, used in Parser::replaceVariables
2049 function wfBraceSubstitution( $matches )
2051 global $wgCurParser;
2052 return $wgCurParser->braceSubstitution( $matches );