3 // require_once('Tokenizer.php');
5 if( $GLOBALS['wgUseWikiHiero'] ){
6 require_once('extensions/wikihiero/wikihiero.php');
8 if( $GLOBALS['wgUseTimeline'] ){
9 require_once('extensions/timeline/Timeline.php');
14 # Processes wiki markup
16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
20 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
24 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
25 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
28 # * only within ParserOptions
31 #----------------------------------------
32 # Variable substitution O(N^2) attack
33 #-----------------------------------------
34 # Without countermeasures, it would be possible to attack the parser by saving a page
35 # filled with a large number of inclusions of large pages. The size of the generated
36 # page would be proportional to the square of the input size. Hence, we limit the number
37 # of inclusions of any given page, thus bringing any attack back to O(N).
40 define( "MAX_INCLUDE_REPEAT", 5 );
42 # Allowed values for $mOutputType
43 define( "OT_HTML", 1 );
44 define( "OT_WIKI", 2 );
45 define( "OT_MSG", 3 );
47 # string parameter for extractTags which will cause it
48 # to strip HTML comments in addition to regular
49 # <XML>-style tags. This should not be anything we
50 # may want to use in wikisyntax
51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
53 # prefix for escaping, used in two functions at least
54 define( "UNIQ_PREFIX", "NaodW29");
58 # Cleared with clearState():
59 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
60 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
63 var $mOptions, $mTitle, $mOutputType;
72 $this->mOutput
= new ParserOutput
;
73 $this->mAutonumber
= 0;
74 $this->mLastSection
= "";
75 $this->mDTopen
= false;
76 $this->mVariables
= false;
77 $this->mIncludeCount
= array();
78 $this->mStripState
= array();
79 $this->mArgStack
= array();
82 # First pass--just handle <nowiki> sections, pass the rest off
83 # to internalParse() which does all the real work.
85 # Returns a ParserOutput
87 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
90 $fname = "Parser::parse";
91 wfProfileIn( $fname );
97 $this->mOptions
= $options;
98 $this->mTitle
=& $title;
99 $this->mOutputType
= OT_HTML
;
102 $text = $this->strip( $text, $this->mStripState
);
103 $text = $this->internalParse( $text, $linestart );
104 $text = $this->unstrip( $text, $this->mStripState
);
105 # Clean up special characters, only run once, next-to-last before doBlockLevels
108 # french spaces, last one Guillemet-left
109 # only if there is something before the space
110 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1 \\2",
111 # french spaces, Guillemet-right
112 "/(\\302\\253) /i"=>"\\1 ",
113 "/<hr *>/i" => '<hr />',
114 "/<br *>/i" => '<br />',
115 "/<center *>/i"=>'<div class="center">',
116 "/<\\/center *>/i" => '</div>',
117 # Clean up spare ampersands; note that we probably ought to be
118 # more careful about named entities.
119 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&'
121 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
124 # french spaces, last one Guillemet-left
125 "/ (\\?|:|!|\\302\\273)/i"=>" \\1",
126 # french spaces, Guillemet-right
127 "/(\\302\\253) /i"=>"\\1 ",
128 "/<center *>/i"=>'<div class="center">',
129 "/<\\/center *>/i" => '</div>'
131 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
134 $text = $this->doBlockLevels( $text, $linestart );
136 $text = $this->tidy($text);
138 $this->mOutput
->setText( $text );
139 wfProfileOut( $fname );
140 return $this->mOutput
;
143 /* static */ function getRandomString()
145 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
148 # Replaces all occurrences of <$tag>content</$tag> in the text
149 # with a random marker and returns the new text. the output parameter
150 # $content will be an associative array filled with data on the form
151 # $unique_marker => content.
153 # If $content is already set, the additional entries will be appended
155 # If $tag is set to STRIP_COMMENTS, the function will extract
156 # <!-- HTML comments -->
158 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
159 $rnd = $uniq_prefix . '-' . $tag . Parser
::getRandomString();
166 while ( "" != $text ) {
167 if($tag==STRIP_COMMENTS
) {
168 $p = preg_split( "/<!--/i", $text, 2 );
170 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
173 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) {
176 if($tag==STRIP_COMMENTS
) {
177 $q = preg_split( "/-->/i", $p[1], 2 );
179 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
181 $marker = $rnd . sprintf("%08X", $n++
);
182 $content[$marker] = $q[0];
183 $stripped .= $marker;
190 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
191 # If $render is set, performs necessary rendering operations on plugins
192 # Returns the text, and fills an array with data needed in unstrip()
193 # If the $state is already a valid strip state, it adds to the state
195 # When $stripcomments is set, HTML comments <!-- like this -->
196 # will be stripped in addition to other tags. This is important
197 # for section editing, where these comments cause confusion when
198 # counting the sections in the wikisource
199 function strip( $text, &$state, $stripcomments = false )
201 $render = ($this->mOutputType
== OT_HTML
);
202 $nowiki_content = array();
203 $hiero_content = array();
204 $timeline_content = array();
205 $math_content = array();
206 $pre_content = array();
207 $comment_content = array();
209 # Replace any instances of the placeholders
210 $uniq_prefix = UNIQ_PREFIX
;
211 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
213 $text = Parser
::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
214 foreach( $nowiki_content as $marker => $content ){
216 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
218 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
222 $text = Parser
::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
223 foreach( $hiero_content as $marker => $content ){
224 if( $render && $GLOBALS['wgUseWikiHiero']){
225 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML
);
227 $hiero_content[$marker] = "<hiero>$content</hiero>";
231 $text = Parser
::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
232 foreach( $timeline_content as $marker => $content ){
233 if( $render && $GLOBALS['wgUseTimeline']){
234 $timeline_content[$marker] = renderTimeline( $content );
236 $timeline_content[$marker] = "<timeline>$content</timeline>";
240 $text = Parser
::extractTags("math", $text, $math_content, $uniq_prefix);
241 foreach( $math_content as $marker => $content ){
243 if( $this->mOptions
->getUseTeX() ) {
244 $math_content[$marker] = renderMath( $content );
246 $math_content[$marker] = "<math>$content<math>";
249 $math_content[$marker] = "<math>$content</math>";
253 $text = Parser
::extractTags("pre", $text, $pre_content, $uniq_prefix);
254 foreach( $pre_content as $marker => $content ){
256 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
258 $pre_content[$marker] = "<pre>$content</pre>";
262 $text = Parser
::extractTags(STRIP_COMMENTS
, $text, $comment_content, $uniq_prefix);
263 foreach( $comment_content as $marker => $content ){
264 $comment_content[$marker] = "<!--$content-->";
268 # Merge state with the pre-existing state, if there is one
270 $state['nowiki'] = $state['nowiki'] +
$nowiki_content;
271 $state['hiero'] = $state['hiero'] +
$hiero_content;
272 $state['timeline'] = $state['timeline'] +
$timeline_content;
273 $state['math'] = $state['math'] +
$math_content;
274 $state['pre'] = $state['pre'] +
$pre_content;
275 $state['comment'] = $state['comment'] +
$comment_content;
278 'nowiki' => $nowiki_content,
279 'hiero' => $hiero_content,
280 'timeline' => $timeline_content,
281 'math' => $math_content,
282 'pre' => $pre_content,
283 'comment' => $comment_content
289 function unstrip( $text, &$state )
291 # Must expand in reverse order, otherwise nested tags will be corrupted
292 $contentDict = end( $state );
293 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
294 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
295 $text = str_replace( key( $contentDict ), $content, $text );
302 # Add an item to the strip state
303 # Returns the unique tag which must be inserted into the stripped text
304 # The tag will be replaced with the original text in unstrip()
306 function insertStripItem( $text, &$state )
308 $rnd = UNIQ_PREFIX
. '-item' . Parser
::getRandomString();
317 $state['item'][$rnd] = $text;
321 # This method generates the list of subcategories and pages for a category
322 function categoryMagic ()
324 global $wgLang , $wgUser ;
325 if ( !$this->mOptions
->getUseCategoryMagic() ) return ; # Doesn't use categories at all
327 $cns = Namespace::getCategory() ;
328 if ( $this->mTitle
->getNamespace() != $cns ) return "" ; # This ain't a category page
330 $r = "<br style=\"clear:both;\"/>\n";
333 $sk =& $wgUser->getSkin() ;
335 $articles = array() ;
336 $children = array() ;
338 $id = $this->mTitle
->getArticleID() ;
341 $t = wfStrencode( $this->mTitle
->getDBKey() );
342 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
343 $res = wfQuery ( $sql, DB_READ
) ;
344 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
346 # For all pages that link to this category
347 foreach ( $data AS $x )
349 $t = $wgLang->getNsText ( $x->cur_namespace
) ;
350 if ( $t != "" ) $t .= ":" ;
351 $t .= $x->cur_title
;
353 if ( $x->cur_namespace
== $cns ) {
354 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
356 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
359 wfFreeResult ( $res ) ;
361 # Showing subcategories
362 if ( count ( $children ) > 0 ) {
363 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
364 $r .= implode ( ", " , $children ) ;
367 # Showing pages in this category
368 if ( count ( $articles ) > 0 ) {
369 $ti = $this->mTitle
->getText() ;
370 $h = wfMsg( "category_header", $ti );
371 $r .= "<h2>{$h}</h2>\n" ;
372 $r .= implode ( ", " , $articles ) ;
379 function getHTMLattrs ()
381 $htmlattrs = array( # Allowed attributes--no scripting, etc.
382 "title", "align", "lang", "dir", "width", "height",
383 "bgcolor", "clear", /* BR */ "noshade", /* HR */
384 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
385 /* FONT */ "type", "start", "value", "compact",
386 /* For various lists, mostly deprecated but safe */
387 "summary", "width", "border", "frame", "rules",
388 "cellspacing", "cellpadding", "valign", "char",
389 "charoff", "colgroup", "col", "span", "abbr", "axis",
390 "headers", "scope", "rowspan", "colspan", /* Tables */
391 "id", "class", "name", "style" /* For CSS */
396 function fixTagAttributes ( $t )
398 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
399 $htmlattrs = $this->getHTMLattrs() ;
401 # Strip non-approved attributes from the tag
403 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
404 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
406 # Strip javascript "expression" from stylesheets. Brute force approach:
407 # If anythin offensive is found, all attributes of the HTML tag are dropped
410 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
411 wfMungeToUtf8( $t ) ) )
419 /* interface with html tidy, used if $wgUseTidy = true */
420 function tidy ( $text ) {
421 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
422 global $wgInputEncoding, $wgOutputEncoding;
423 $fname = "Parser::tidy";
424 wfProfileIn( $fname );
427 switch(strtoupper($wgOutputEncoding)) {
429 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)?
' -latin1':' -raw';
432 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)?
' -utf8':' -raw';
435 $wgTidyOpts .= ' -raw';
438 $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
439 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
440 '<head><title>test</title></head><body>'.$text.'</body></html>';
441 $descriptorspec = array(
442 0 => array("pipe", "r"),
443 1 => array("pipe", "w"),
444 2 => array("file", "/dev/null", "a")
446 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
447 if (is_resource($process)) {
448 fwrite($pipes[0], $text);
450 while (!feof($pipes[1])) {
451 $cleansource .= fgets($pipes[1], 1024);
454 $return_value = proc_close($process);
457 wfProfileOut( $fname );
459 if( $cleansource == '' && $text != '') {
460 wfDebug( "Tidy error detected!\n" );
461 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
467 function doTableStuff ( $t )
469 $t = explode ( "\n" , $t ) ;
470 $td = array () ; # Is currently a td tag open?
471 $ltd = array () ; # Was it TD or TH?
472 $tr = array () ; # Is currently a tr tag open?
473 $ltr = array () ; # tr attributes
474 foreach ( $t AS $k => $x )
477 $fc = substr ( $x , 0 , 1 ) ;
478 if ( "{|" == substr ( $x , 0 , 2 ) )
480 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
481 array_push ( $td , false ) ;
482 array_push ( $ltd , "" ) ;
483 array_push ( $tr , false ) ;
484 array_push ( $ltr , "" ) ;
486 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
487 else if ( "|}" == substr ( $x , 0 , 2 ) )
490 $l = array_pop ( $ltd ) ;
491 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
492 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
496 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
498 $z = trim ( substr ( $x , 2 ) ) ;
499 $t[$k] = "<caption>{$z}</caption>\n" ;
501 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
503 $x = substr ( $x , 1 ) ;
504 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
506 $l = array_pop ( $ltd ) ;
507 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
508 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
511 array_push ( $tr , false ) ;
512 array_push ( $td , false ) ;
513 array_push ( $ltd , "" ) ;
514 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
516 else if ( "|" == $fc ||
"!" == $fc ||
"|+" == substr ( $x , 0 , 2 ) ) # Caption
518 if ( "|+" == substr ( $x , 0 , 2 ) )
521 $x = substr ( $x , 1 ) ;
523 $after = substr ( $x , 1 ) ;
524 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
525 $after = explode ( "||" , $after ) ;
527 foreach ( $after AS $theline )
532 $tra = array_pop ( $ltr ) ;
533 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
534 array_push ( $tr , true ) ;
535 array_push ( $ltr , "" ) ;
538 $l = array_pop ( $ltd ) ;
539 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
540 if ( $fc == "|" ) $l = "td" ;
541 else if ( $fc == "!" ) $l = "th" ;
542 else if ( $fc == "+" ) $l = "caption" ;
544 array_push ( $ltd , $l ) ;
545 $y = explode ( "|" , $theline , 2 ) ;
546 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
547 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
549 array_push ( $td , true ) ;
554 # Closing open td, tr && table
555 while ( count ( $td ) > 0 )
557 if ( array_pop ( $td ) ) $t[] = "</td>" ;
558 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
562 $t = implode ( "\n" , $t ) ;
563 # $t = $this->removeHTMLtags( $t );
567 # Parses the text and adds the result to the strip state
568 # Returns the strip tag
569 function stripParse( $text, $linestart, $args )
571 $text = $this->strip( $text, $this->mStripState
);
572 $text = $this->internalParse( $text, $linestart, $args, false );
574 $text = "\n" . $text;
576 return $this->insertStripItem( $text, $this->mStripState
);
579 function internalParse( $text, $linestart, $args = array(), $isMain=true )
581 $fname = "Parser::internalParse";
582 wfProfileIn( $fname );
584 $text = $this->removeHTMLtags( $text );
585 $text = $this->replaceVariables( $text, $args );
587 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
589 $text = $this->doHeadings( $text );
590 if($this->mOptions
->getUseDynamicDates()) {
591 global $wgDateFormatter;
592 $text = $wgDateFormatter->reformat( $this->mOptions
->getDateFormat(), $text );
594 $text = $this->doAllQuotes( $text );
595 $text = $this->replaceExternalLinks( $text );
596 $text = $this->replaceInternalLinks ( $text );
597 $text = $this->replaceInternalLinks ( $text );
598 //$text = $this->doTokenizedParser ( $text );
599 $text = $this->doTableStuff ( $text ) ;
600 $text = $this->magicISBN( $text );
601 $text = $this->magicRFC( $text );
602 $text = $this->formatHeadings( $text, $isMain );
603 $sk =& $this->mOptions
->getSkin();
604 $text = $sk->transformContent( $text );
606 if ( !isset ( $this->categoryMagicDone
) ) {
607 $text .= $this->categoryMagic () ;
608 $this->categoryMagicDone
= true ;
611 wfProfileOut( $fname );
616 /* private */ function doHeadings( $text )
618 for ( $i = 6; $i >= 1; --$i ) {
619 $h = substr( "======", 0, $i );
620 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
621 "<h{$i}>\\1</h{$i}>\\2", $text );
626 /* private */ function doAllQuotes( $text )
629 $lines = explode( "\n", $text );
630 foreach ( $lines as $line ) {
631 $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
633 return substr($outtext, 0,-1);
636 /* private */ function doQuotes( $pre, $text, $mode )
638 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
639 $m1_strong = ($m[1] == "") ?
"" : "<strong>{$m[1]}</strong>";
640 $m1_em = ($m[1] == "") ?
"" : "<em>{$m[1]}</em>";
641 if ( substr ($m[2], 0, 1) == "'" ) {
642 $m[2] = substr ($m[2], 1);
644 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ?
"both" : "emstrong" );
645 } else if ($mode == "strong") {
646 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
647 } else if (($mode == "emstrong") ||
($mode == "both")) {
648 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
649 } else if ($mode == "strongem") {
650 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
652 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
655 if ($mode == "strong") {
656 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ?
"both" : "strongem" );
657 } else if ($mode == "em") {
658 return $m1_em . $this->doQuotes ( "", $m[2], "" );
659 } else if ($mode == "emstrong") {
660 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
661 } else if (($mode == "strongem") ||
($mode == "both")) {
662 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
664 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
668 $text_strong = ($text == "") ?
"" : "<strong>{$text}</strong>";
669 $text_em = ($text == "") ?
"" : "<em>{$text}</em>";
672 } else if ($mode == "em") {
673 return $pre . $text_em;
674 } else if ($mode == "strong") {
675 return $pre . $text_strong;
676 } else if ($mode == "strongem") {
677 return (($pre == "") && ($text == "")) ?
"" : "<strong>{$pre}{$text_em}</strong>";
679 return (($pre == "") && ($text == "")) ?
"" : "<em>{$pre}{$text_strong}</em>";
684 # Note: we have to do external links before the internal ones,
685 # and otherwise take great care in the order of things here, so
686 # that we don't end up interpreting some URLs twice.
688 /* private */ function replaceExternalLinks( $text )
690 $fname = "Parser::replaceExternalLinks";
691 wfProfileIn( $fname );
692 $text = $this->subReplaceExternalLinks( $text, "http", true );
693 $text = $this->subReplaceExternalLinks( $text, "https", true );
694 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
695 $text = $this->subReplaceExternalLinks( $text, "irc", false );
696 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
697 $text = $this->subReplaceExternalLinks( $text, "news", false );
698 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
699 wfProfileOut( $fname );
703 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
705 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
706 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
708 # this is the list of separators that should be ignored if they
709 # are the last character of an URL but that should be included
710 # if they occur within the URL, e.g. "go to www.foo.com, where .."
711 # in this case, the last comma should not become part of the URL,
712 # but in "www.foo.com/123,2342,32.htm" it should.
714 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
715 $images = "gif|png|jpg|jpeg";
717 # PLEASE NOTE: The curly braces { } are not part of the regex,
718 # they are interpreted as part of the string (used to tell PHP
719 # that the content of the string should be inserted there).
720 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
721 "((?i){$images})([^{$uc}]|$)/";
723 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
724 $sk =& $this->mOptions
->getSkin();
726 if ( $autonumber and $this->mOptions
->getAllowExternalImages() ) { # Use img tags only for HTTP urls
727 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
728 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
730 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
731 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
732 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
734 $s = str_replace( $unique, $protocol, $s );
736 $a = explode( "[{$protocol}:", " " . $s );
737 $s = array_shift( $a );
738 $s = substr( $s, 1 );
740 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
741 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
743 foreach ( $a as $line ) {
744 if ( preg_match( $e1, $line, $m ) ) {
745 $link = "{$protocol}:{$m[1]}";
747 if ( $autonumber ) { $text = "[" . ++
$this->mAutonumber
. "]"; }
748 else { $text = wfEscapeHTML( $link ); }
749 } else if ( preg_match( $e2, $line, $m ) ) {
750 $link = "{$protocol}:{$m[1]}";
754 $s .= "[{$protocol}:" . $line;
757 if( $link == $text ||
preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
760 # Expand the URL for printable version
761 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
763 $la = $sk->getExternalLinkAttributes( $link, $text );
764 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
771 /* private */ function replaceInternalLinks( $s )
773 global $wgLang, $wgLinkCache;
774 global $wgNamespacesWithSubpages, $wgLanguageCode;
775 static $fname = "Parser::replaceInternalLink" ;
776 wfProfileIn( $fname );
778 wfProfileIn( "$fname-setup" );
780 # the % is needed to support urlencoded titles as well
781 if ( !$tc ) { $tc = Title
::legalChars() . "#%"; }
782 $sk =& $this->mOptions
->getSkin();
784 $a = explode( "[[", " " . $s );
785 $s = array_shift( $a );
786 $s = substr( $s, 1 );
788 # Match a link having the form [[namespace:link|alternate]]trail
790 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
791 # Match the end of a line for a word that's not followed by whitespace,
792 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
793 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
794 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
795 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
798 # Special and Media are pseudo-namespaces; no pages actually exist in them
799 static $image = FALSE;
800 static $special = FALSE;
801 static $media = FALSE;
802 static $category = FALSE;
803 if ( !$image ) { $image = Namespace::getImage(); }
804 if ( !$special ) { $special = Namespace::getSpecial(); }
805 if ( !$media ) { $media = Namespace::getMedia(); }
806 if ( !$category ) { $category = Namespace::getCategory(); }
808 $nottalk = !Namespace::isTalk( $this->mTitle
->getNamespace() );
810 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
817 wfProfileOut( "$fname-setup" );
819 foreach ( $a as $line ) {
820 $prefix = $new_prefix;
822 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
824 # fix up urlencoded title texts
825 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
827 } else { # Invalid form; output directly
828 $s .= $prefix . "[[" . $line ;
829 wfProfileOut( $fname );
835 :Foobar -- override special treatment of prefix (images, language links)
836 /Foobar -- convert to CurrentPage/Foobar
837 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
839 $c = substr($m[1],0,1);
840 $noforce = ($c != ":");
841 if( $c == "/" ) { # subpage
842 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
843 $m[1]=substr($m[1],1,strlen($m[1])-2);
846 $noslash=substr($m[1],1);
848 if(!empty($wgNamespacesWithSubpages[$this->mTitle
->getNamespace()])) { # subpages allowed here
849 $link = $this->mTitle
->getPrefixedText(). "/" . trim($noslash);
852 } # this might be changed for ugliness reasons
854 $link = $noslash; # no subpage allowed, use standard link
856 } elseif( $noforce ) { # no subpage
859 $link = substr( $m[1], 1 );
861 $wasblank = ( "" == $text );
865 $nt = Title
::newFromText( $link );
867 $s .= $prefix . "[[" . $line;
868 wfProfileOut( $fname );
871 $ns = $nt->getNamespace();
872 $iw = $nt->getInterWiki();
874 if( $iw && $this->mOptions
->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
875 array_push( $this->mOutput
->mLanguageLinks
, $nt->getPrefixedText() );
876 $tmp = $prefix . $trail ;
877 wfProfileOut( $fname );
878 $s .= (trim($tmp) == '')?
'': $tmp;
881 if ( $ns == $image ) {
882 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
883 $wgLinkCache->addImageLinkObj( $nt );
884 wfProfileOut( $fname );
887 if ( $ns == $category ) {
888 $t = $nt->getText() ;
889 $nnt = Title
::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
891 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
892 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
893 $wgLinkCache->resume();
895 $sortkey = $wasblank ?
$this->mTitle
->getPrefixedText() : $text;
896 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
897 $this->mOutput
->mCategoryLinks
[] = $t ;
898 $s .= $prefix . $trail ;
899 wfProfileOut( $fname );
903 if( ( $nt->getPrefixedText() == $this->mTitle
->getPrefixedText() ) &&
904 ( strpos( $link, "#" ) == FALSE ) ) {
905 # Self-links are handled specially; generally de-link and change to bold.
906 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
907 wfProfileOut( $fname );
911 if( $ns == $media ) {
912 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
913 $wgLinkCache->addImageLinkObj( $nt );
914 wfProfileOut( $fname );
916 } elseif( $ns == $special ) {
917 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
918 wfProfileOut( $fname );
921 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
923 wfProfileOut( $fname );
927 # Some functions here used by doBlockLevels()
929 /* private */ function closeParagraph()
932 if ( '' != $this->mLastSection
) {
933 $result = "</" . $this->mLastSection
. ">\n";
935 $this->mInPre
= false;
936 $this->mLastSection
= "";
939 # getCommon() returns the length of the longest common substring
940 # of both arguments, starting at the beginning of both.
942 /* private */ function getCommon( $st1, $st2 )
944 $fl = strlen( $st1 );
945 $shorter = strlen( $st2 );
946 if ( $fl < $shorter ) { $shorter = $fl; }
948 for ( $i = 0; $i < $shorter; ++
$i ) {
949 if ( $st1{$i} != $st2{$i} ) { break; }
953 # These next three functions open, continue, and close the list
954 # element appropriate to the prefix character passed into them.
956 /* private */ function openList( $char )
958 $result = $this->closeParagraph();
960 if ( "*" == $char ) { $result .= "<ul><li>"; }
961 else if ( "#" == $char ) { $result .= "<ol><li>"; }
962 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
963 else if ( ";" == $char ) {
964 $result .= "<dl><dt>";
965 $this->mDTopen
= true;
967 else { $result = "<!-- ERR 1 -->"; }
972 /* private */ function nextItem( $char )
974 if ( "*" == $char ||
"#" == $char ) { return "</li><li>"; }
975 else if ( ":" == $char ||
";" == $char ) {
977 if ( $this->mDTopen
) { $close = "</dt>"; }
978 if ( ";" == $char ) {
979 $this->mDTopen
= true;
980 return $close . "<dt>";
982 $this->mDTopen
= false;
983 return $close . "<dd>";
986 return "<!-- ERR 2 -->";
989 /* private */function closeList( $char )
991 if ( "*" == $char ) { $text = "</li></ul>"; }
992 else if ( "#" == $char ) { $text = "</li></ol>"; }
993 else if ( ":" == $char ) {
994 if ( $this->mDTopen
) {
995 $this->mDTopen
= false;
996 $text = "</dt></dl>";
998 $text = "</dd></dl>";
1001 else { return "<!-- ERR 3 -->"; }
1005 /* private */ function doBlockLevels( $text, $linestart ) {
1006 $fname = "Parser::doBlockLevels";
1007 wfProfileIn( $fname );
1009 # Parsing through the text line by line. The main thing
1010 # happening here is handling of block-level elements p, pre,
1011 # and making lists from lines starting with * # : etc.
1013 $textLines = explode( "\n", $text );
1015 $lastPrefix = $output = $lastLine = '';
1016 $this->mDTopen
= $inBlockElem = false;
1018 $paragraphStack = false;
1020 if ( !$linestart ) {
1021 $output .= array_shift( $textLines );
1023 foreach ( $textLines as $oLine ) {
1024 $lastPrefixLength = strlen( $lastPrefix );
1025 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1026 $preOpenMatch = preg_match("/<pre/i", $oLine );
1027 if (!$this->mInPre
) {
1028 $this->mInPre
= !empty($preOpenMatch);
1030 if ( !$this->mInPre
) {
1031 # Multiple prefixes may abut each other for nested lists.
1032 $prefixLength = strspn( $oLine, "*#:;" );
1033 $pref = substr( $oLine, 0, $prefixLength );
1036 $pref2 = str_replace( ";", ":", $pref );
1037 $t = substr( $oLine, $prefixLength );
1039 # Don't interpret any other prefixes in preformatted text
1041 $pref = $pref2 = '';
1046 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1047 # Same as the last item, so no need to deal with nesting or opening stuff
1048 $output .= $this->nextItem( substr( $pref, -1 ) );
1049 $paragraphStack = false;
1051 if ( ";" == substr( $pref, -1 ) ) {
1052 # The one nasty exception: definition lists work like this:
1053 # ; title : definition text
1054 # So we check for : in the remainder text to split up the
1055 # title and definition, without b0rking links.
1056 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1057 if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
1059 $output .= $term . $this->nextItem( ":" );
1063 } elseif( $prefixLength ||
$lastPrefixLength ) {
1064 # Either open or close a level...
1065 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1066 $paragraphStack = false;
1068 while( $commonPrefixLength < $lastPrefixLength ) {
1069 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1070 --$lastPrefixLength;
1072 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1073 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1075 while ( $prefixLength > $commonPrefixLength ) {
1076 $char = substr( $pref, $commonPrefixLength, 1 );
1077 $output .= $this->openList( $char );
1079 if ( ";" == $char ) {
1080 # FIXME: This is dupe of code above
1081 if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
1083 $output .= $term . $this->nextItem( ":" );
1087 ++
$commonPrefixLength;
1089 $lastPrefix = $pref2;
1091 if( 0 == $prefixLength ) {
1092 # No prefix (not in list)--go to paragraph mode
1093 $uniq_prefix = UNIQ_PREFIX
;
1094 // XXX: use a stack for nestable elements like span, table and div
1095 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1096 $closematch = preg_match(
1097 "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1098 "<div|<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1099 if ( $openmatch or $closematch ) {
1100 $paragraphStack = false;
1101 $output .= $this->closeParagraph();
1102 if($preOpenMatch and !$preCloseMatch) {
1103 $this->mInPre
= true;
1105 if ( $closematch ) {
1106 $inBlockElem = false;
1108 $inBlockElem = true;
1110 } else if ( !$inBlockElem && !$this->mInPre
) {
1111 if ( " " == $t{0} and trim($t) != '' ) {
1113 if ($this->mLastSection
!= 'pre') {
1114 $paragraphStack = false;
1115 $output .= $this->closeParagraph().'<pre>';
1116 $this->mLastSection
= 'pre';
1120 if ( '' == trim($t) ) {
1121 if ( $paragraphStack ) {
1122 $output .= $paragraphStack.'<br />';
1123 $paragraphStack = false;
1124 $this->mLastSection
= 'p';
1126 if ($this->mLastSection
!= 'p' ) {
1127 $output .= $this->closeParagraph();
1128 $this->mLastSection
= '';
1129 $paragraphStack = "<p>";
1131 $paragraphStack = '</p><p>';
1135 if ( $paragraphStack ) {
1136 $output .= $paragraphStack;
1137 $paragraphStack = false;
1138 $this->mLastSection
= 'p';
1139 } else if ($this->mLastSection
!= 'p') {
1140 $output .= $this->closeParagraph().'<p>';
1141 $this->mLastSection
= 'p';
1147 if ($paragraphStack === false) {
1151 while ( $prefixLength ) {
1152 $output .= $this->closeList( $pref2{$prefixLength-1} );
1155 if ( "" != $this->mLastSection
) {
1156 $output .= "</" . $this->mLastSection
. ">";
1157 $this->mLastSection
= "";
1160 wfProfileOut( $fname );
1164 function getVariableValue( $index ) {
1165 global $wgLang, $wgSitename, $wgServer;
1168 case MAG_CURRENTMONTH
:
1170 case MAG_CURRENTMONTHNAME
:
1171 return $wgLang->getMonthName( date("n") );
1172 case MAG_CURRENTMONTHNAMEGEN
:
1173 return $wgLang->getMonthNameGen( date("n") );
1174 case MAG_CURRENTDAY
:
1177 return $this->mTitle
->getText();
1179 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1180 return $wgLang->getNsText($this->mTitle
->getNamespace()); // Patch by Dori
1181 case MAG_CURRENTDAYNAME
:
1182 return $wgLang->getWeekdayName( date("w")+
1 );
1183 case MAG_CURRENTYEAR
:
1185 case MAG_CURRENTTIME
:
1186 return $wgLang->time( wfTimestampNow(), false );
1187 case MAG_NUMBEROFARTICLES
:
1188 return wfNumberOfArticles();
1198 function initialiseVariables()
1200 global $wgVariableIDs;
1201 $this->mVariables
= array();
1202 foreach ( $wgVariableIDs as $id ) {
1203 $mw =& MagicWord
::get( $id );
1204 $mw->addToArray( $this->mVariables
, $this->getVariableValue( $id ) );
1208 /* private */ function replaceVariables( $text, $args = array() )
1210 global $wgLang, $wgScript, $wgArticlePath;
1212 $fname = "Parser::replaceVariables";
1213 wfProfileIn( $fname );
1216 if ( !$this->mVariables
) {
1217 $this->initialiseVariables();
1219 $titleChars = Title
::legalChars();
1220 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1222 # This function is called recursively. To keep track of arguments we need a stack:
1223 array_push( $this->mArgStack
, $args );
1225 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1226 $GLOBALS['wgCurParser'] =& $this;
1229 if ( $this->mOutputType
== OT_HTML
) {
1230 # Variable substitution
1231 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1233 # Argument substitution
1234 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1236 # Template substitution
1237 $regex = "/(\\n?){{([$titleChars]*)(\\|.*?|)}}/s";
1238 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1240 array_pop( $this->mArgStack
);
1242 wfProfileOut( $fname );
1246 function variableSubstitution( $matches )
1248 if ( array_key_exists( $matches[1], $this->mVariables
) ) {
1249 $text = $this->mVariables
[$matches[1]];
1250 $this->mOutput
->mContainsOldMagic
= true;
1252 $text = $matches[0];
1257 function braceSubstitution( $matches )
1259 global $wgLinkCache, $wgLang;
1260 $fname = "Parser::braceSubstitution";
1267 # $newline is an optional newline character before the braces
1268 # $part1 is the bit before the first |, and must contain only title characters
1269 # $args is a list of arguments, starting from index 0, not including $part1
1271 $newline = $matches[1];
1272 $part1 = $matches[2];
1273 # If the third subpattern matched anything, it will start with |
1274 if ( $matches[3] !== "" ) {
1275 $args = explode( "|", substr( $matches[3], 1 ) );
1279 $argc = count( $args );
1282 if ( strpos( $matches[0], "{{{" ) !== false ) {
1283 $text = $matches[0];
1290 $mwSubst =& MagicWord
::get( MAG_SUBST
);
1291 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1292 if ( $this->mOutputType
!= OT_WIKI
) {
1293 # Invalid SUBST not replaced at PST time
1294 # Return without further processing
1295 $text = $matches[0];
1299 } elseif ( $this->mOutputType
== OT_WIKI
) {
1300 # SUBST not found in PST pass, do nothing
1301 $text = $matches[0];
1306 # MSG, MSGNW and INT
1309 $mwMsgnw =& MagicWord
::get( MAG_MSGNW
);
1310 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1313 # Remove obsolete MSG:
1314 $mwMsg =& MagicWord
::get( MAG_MSG
);
1315 $mwMsg->matchStartAndRemove( $part1 );
1318 # Check if it is an internal message
1319 $mwInt =& MagicWord
::get( MAG_INT
);
1320 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1321 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1322 $text = wfMsgReal( $part1, $args, true );
1330 # Check for NS: (namespace expansion)
1331 $mwNs = MagicWord
::get( MAG_NS
);
1332 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1333 if ( intval( $part1 ) ) {
1334 $text = $wgLang->getNsText( intval( $part1 ) );
1337 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1338 if ( !is_null( $index ) ) {
1339 $text = $wgLang->getNsText( $index );
1346 # LOCALURL and LOCALURLE
1348 $mwLocal = MagicWord
::get( MAG_LOCALURL
);
1349 $mwLocalE = MagicWord
::get( MAG_LOCALURLE
);
1351 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1352 $func = 'getLocalURL';
1353 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1354 $func = 'escapeLocalURL';
1359 if ( $func !== '' ) {
1360 $title = Title
::newFromText( $part1 );
1361 if ( !is_null( $title ) ) {
1363 $text = $title->$func( $args[0] );
1365 $text = $title->$func();
1372 # Internal variables
1373 if ( !$found && array_key_exists( $part1, $this->mVariables
) ) {
1374 $text = $this->mVariables
[$part1];
1376 $this->mOutput
->mContainsOldMagic
= true;
1379 # Arguments input from the caller
1380 $inputArgs = end( $this->mArgStack );
1381 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1382 $text = $inputArgs[$part1];
1386 # Load from database
1388 $title = Title
::newFromText( $part1, NS_TEMPLATE
);
1389 if ( !is_null( $title ) && !$title->isExternal() ) {
1390 # Check for excessive inclusion
1391 $dbk = $title->getPrefixedDBkey();
1392 if ( $this->incrementIncludeCount( $dbk ) ) {
1393 $article = new Article( $title );
1394 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1395 if ( $articleContent !== false ) {
1397 $text = $articleContent;
1402 # If the title is valid but undisplayable, make a link to it
1403 if ( $this->mOutputType
== OT_HTML
&& !$found ) {
1404 $text = "[[" . $title->getPrefixedText() . "]]";
1410 # Recursive parsing, escaping and link table handling
1411 # Only for HTML output
1412 if ( $nowiki && $found && $this->mOutputType
== OT_HTML
) {
1413 $text = wfEscapeWikiText( $text );
1414 } elseif ( $this->mOutputType
== OT_HTML
&& $found && !$noparse) {
1415 # Clean up argument array
1416 $assocArgs = array();
1418 foreach( $args as $arg ) {
1419 $eqpos = strpos( $arg, "=" );
1420 if ( $eqpos === false ) {
1421 $assocArgs[$index++
] = $arg;
1423 $name = trim( substr( $arg, 0, $eqpos ) );
1424 $value = trim( substr( $arg, $eqpos+
1 ) );
1425 if ( $value === false ) {
1428 if ( $name !== false ) {
1429 $assocArgs[$name] = $value;
1434 # Do not enter included links in link table
1435 if ( !is_null( $title ) ) {
1436 $wgLinkCache->suspend();
1439 # Run full parser on the included text
1440 $text = $this->stripParse( $text, (bool)$newline, $assocArgs );
1442 # Resume the link cache and register the inclusion as a link
1443 if ( !is_null( $title ) ) {
1444 $wgLinkCache->resume();
1445 $wgLinkCache->addLinkObj( $title );
1456 # Triple brace replacement -- used for template arguments
1457 function argSubstitution( $matches )
1459 $newline = $matches[1];
1460 $arg = trim( $matches[2] );
1461 $text = $matches[0];
1462 $inputArgs = end( $this->mArgStack
);
1464 if ( array_key_exists( $arg, $inputArgs ) ) {
1465 $text = $this->stripParse( $inputArgs[$arg], (bool)$newline, array() );
1471 # Returns true if the function is allowed to include this entity
1472 function incrementIncludeCount( $dbk )
1474 if ( !array_key_exists( $dbk, $this->mIncludeCount
) ) {
1475 $this->mIncludeCount
[$dbk] = 0;
1477 if ( ++
$this->mIncludeCount
[$dbk] <= MAX_INCLUDE_REPEAT
) {
1485 # Cleans up HTML, removes dangerous tags and attributes
1486 /* private */ function removeHTMLtags( $text )
1488 global $wgUseTidy, $wgUserHtml;
1489 $fname = "Parser::removeHTMLtags";
1490 wfProfileIn( $fname );
1493 $htmlpairs = array( # Tags that must be closed
1494 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1495 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1496 "strike", "strong", "tt", "var", "div", "center",
1497 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1498 "ruby", "rt" , "rb" , "rp", "p"
1500 $htmlsingle = array(
1501 "br", "hr", "li", "dt", "dd"
1503 $htmlnest = array( # Tags that can be nested--??
1504 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1505 "dl", "font", "big", "small", "sub", "sup"
1507 $tabletags = array( # Can only appear inside table
1511 $htmlpairs = array();
1512 $htmlsingle = array();
1513 $htmlnest = array();
1514 $tabletags = array();
1517 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1518 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1520 $htmlattrs = $this->getHTMLattrs () ;
1522 # Remove HTML comments
1523 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1525 $bits = explode( "<", $text );
1526 $text = array_shift( $bits );
1528 $tagstack = array(); $tablestack = array();
1529 foreach ( $bits as $x ) {
1530 $prev = error_reporting( E_ALL
& ~
( E_NOTICE | E_WARNING
) );
1531 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1533 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1534 error_reporting( $prev );
1537 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1541 if ( ! in_array( $t, $htmlsingle ) &&
1542 ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
1543 if(!empty($ot)) array_push( $tagstack, $ot );
1546 if ( $t == "table" ) {
1547 $tagstack = array_pop( $tablestack );
1552 # Keep track for later
1553 if ( in_array( $t, $tabletags ) &&
1554 ! in_array( "table", $tagstack ) ) {
1556 } else if ( in_array( $t, $tagstack ) &&
1557 ! in_array ( $t , $htmlnest ) ) {
1559 } else if ( ! in_array( $t, $htmlsingle ) ) {
1560 if ( $t == "table" ) {
1561 array_push( $tablestack, $tagstack );
1562 $tagstack = array();
1564 array_push( $tagstack, $t );
1566 # Strip non-approved attributes from the tag
1567 $newparams = $this->fixTagAttributes($params);
1571 $rest = str_replace( ">", ">", $rest );
1572 $text .= "<$slash$t $newparams$brace$rest";
1576 $text .= "<" . str_replace( ">", ">", $x);
1578 # Close off any remaining tags
1579 while ( $t = array_pop( $tagstack ) ) {
1581 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1584 # this might be possible using tidy itself
1585 foreach ( $bits as $x ) {
1586 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1588 @list
( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1589 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1590 $newparams = $this->fixTagAttributes($params);
1591 $rest = str_replace( ">", ">", $rest );
1592 $text .= "<$slash$t $newparams$brace$rest";
1594 $text .= "<" . str_replace( ">", ">", $x);
1598 wfProfileOut( $fname );
1605 * This function accomplishes several tasks:
1606 * 1) Auto-number headings if that option is enabled
1607 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1608 * 3) Add a Table of contents on the top for users who have enabled the option
1609 * 4) Auto-anchor headings
1611 * It loops through all headlines, collects the necessary data, then splits up the
1612 * string and re-inserts the newly formatted headlines.
1616 /* private */ function formatHeadings( $text, $isMain=true )
1618 global $wgInputEncoding;
1620 $doNumberHeadings = $this->mOptions
->getNumberHeadings();
1621 $doShowToc = $this->mOptions
->getShowToc();
1622 if( !$this->mTitle
->userCanEdit() ) {
1624 $rightClickHack = 0;
1626 $showEditLink = $this->mOptions
->getEditSection();
1627 $rightClickHack = $this->mOptions
->getEditSectionOnRightClick();
1630 # Inhibit editsection links if requested in the page
1631 $esw =& MagicWord
::get( MAG_NOEDITSECTION
);
1632 if( $esw->matchAndRemove( $text ) ) {
1635 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1637 $mw =& MagicWord
::get( MAG_NOTOC
);
1638 if( $mw->matchAndRemove( $text ) ) {
1642 # never add the TOC to the Main Page. This is an entry page that should not
1643 # be more than 1-2 screens large anyway
1644 if( $this->mTitle
->getPrefixedText() == wfMsg("mainpage") ) {
1648 # Get all headlines for numbering them and adding funky stuff like [edit]
1649 # links - this is for later, but we need the number of headlines right now
1650 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1652 # if there are fewer than 4 headlines in the article, do not show TOC
1653 if( $numMatches < 4 ) {
1657 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1658 # override above conditions and always show TOC
1659 $mw =& MagicWord
::get( MAG_FORCETOC
);
1660 if ($mw->matchAndRemove( $text ) ) {
1665 # We need this to perform operations on the HTML
1666 $sk =& $this->mOptions
->getSkin();
1671 # Ugh .. the TOC should have neat indentation levels which can be
1672 # passed to the skin functions. These are determined here
1677 $sublevelCount = array();
1680 foreach( $matches[3] as $headline ) {
1683 $prevlevel = $level;
1685 $level = $matches[1][$headlineCount];
1686 if( ( $doNumberHeadings ||
$doShowToc ) && $prevlevel && $level > $prevlevel ) {
1687 # reset when we enter a new level
1688 $sublevelCount[$level] = 0;
1689 $toc .= $sk->tocIndent( $level - $prevlevel );
1690 $toclevel +
= $level - $prevlevel;
1692 if( ( $doNumberHeadings ||
$doShowToc ) && $level < $prevlevel ) {
1693 # reset when we step back a level
1694 $sublevelCount[$level+
1]=0;
1695 $toc .= $sk->tocUnindent( $prevlevel - $level );
1696 $toclevel -= $prevlevel - $level;
1698 # count number of headlines for each level
1699 @$sublevelCount[$level]++
;
1700 if( $doNumberHeadings ||
$doShowToc ) {
1702 for( $i = 1; $i <= $level; $i++
) {
1703 if( !empty( $sublevelCount[$i] ) ) {
1707 $numbering .= $sublevelCount[$i];
1713 # The canonized header is a version of the header text safe to use for links
1714 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1715 $canonized_headline = $this->unstrip( $headline, $this->mStripState
);
1718 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1719 $tocline = trim( $canonized_headline );
1720 $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT
, $wgInputEncoding ) ) );
1721 # strip out urlencoded (inserted for french spaces, e.g. first space in 'something : something')
1722 $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1723 $refer[$headlineCount] = $canonized_headline;
1725 # count how many in assoc. array so we can track dupes in anchors
1726 @$refers[$canonized_headline]++
;
1727 $refcount[$headlineCount]=$refers[$canonized_headline];
1729 # Prepend the number to the heading text
1731 if( $doNumberHeadings ||
$doShowToc ) {
1732 $tocline = $numbering . " " . $tocline;
1734 # Don't number the heading if it is the only one (looks silly)
1735 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1736 # the two are different if the line contains a link
1737 $headline=$numbering . " " . $headline;
1741 # Create the anchor for linking from the TOC to the section
1742 $anchor = $canonized_headline;
1743 if($refcount[$headlineCount] > 1 ) {
1744 $anchor .= "_" . $refcount[$headlineCount];
1747 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1749 if( $showEditLink ) {
1750 if ( empty( $head[$headlineCount] ) ) {
1751 $head[$headlineCount] = "";
1753 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+
1);
1756 # Add the edit section span
1757 if( $rightClickHack ) {
1758 $headline = $sk->editSectionScript($headlineCount+
1,$headline);
1761 # give headline the correct <h#> tag
1762 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1768 $toclines = $headlineCount;
1769 $toc .= $sk->tocUnindent( $toclevel );
1770 $toc = $sk->tocTable( $toc );
1773 # split up and insert constructed headlines
1775 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1778 foreach( $blocks as $block ) {
1779 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1780 # This is the [edit] link that appears for the top block of text when
1781 # section editing is enabled
1783 # Disabled because it broke block formatting
1784 # For example, a bullet point in the top line
1785 # $full .= $sk->editSectionLink(0);
1788 if( $doShowToc && !$i && $isMain) {
1789 # Top anchor now in skin
1793 if( !empty( $head[$i] ) ) {
1802 /* private */ function magicISBN( $text )
1806 $a = split( "ISBN ", " $text" );
1807 if ( count ( $a ) < 2 ) return $text;
1808 $text = substr( array_shift( $a ), 1);
1809 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1811 foreach ( $a as $x ) {
1812 $isbn = $blank = "" ;
1813 while ( " " == $x{0} ) {
1815 $x = substr( $x, 1 );
1817 while ( strstr( $valid, $x{0} ) != false ) {
1819 $x = substr( $x, 1 );
1821 $num = str_replace( "-", "", $isbn );
1822 $num = str_replace( " ", "", $num );
1825 $text .= "ISBN $blank$x";
1827 $titleObj = Title
::makeTitle( NS_SPECIAL
, "Booksources" );
1828 $text .= "<a href=\"" .
1829 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1830 "\" class=\"internal\">ISBN $isbn</a>";
1836 /* private */ function magicRFC( $text )
1840 $a = split( "RFC ", " $text" );
1841 if ( count ( $a ) < 2 ) return $text;
1842 $text = substr( array_shift( $a ), 1);
1843 $valid = "0123456789";
1845 foreach ( $a as $x ) {
1846 $rfc = $blank = "" ;
1847 while ( " " == $x{0} ) {
1849 $x = substr( $x, 1 );
1851 while ( strstr( $valid, $x{0} ) != false ) {
1853 $x = substr( $x, 1 );
1857 $text .= "RFC $blank$x";
1859 $url = wfmsg( "rfcurl" );
1860 $url = str_replace( "$1", $rfc, $url);
1861 $sk =& $this->mOptions
->getSkin();
1862 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1863 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1869 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1871 $this->mOptions
= $options;
1872 $this->mTitle
=& $title;
1873 $this->mOutputType
= OT_WIKI
;
1875 if ( $clearState ) {
1876 $this->clearState();
1879 $stripState = false;
1883 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1887 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1888 "/<br *?>/i" => "<br />",
1890 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1892 $text = $this->strip( $text, $stripState, false );
1893 $text = $this->pstPass2( $text, $user );
1894 $text = $this->unstrip( $text, $stripState );
1898 /* private */ function pstPass2( $text, &$user )
1900 global $wgLang, $wgLocaltimezone, $wgCurParser;
1902 # Variable replacement
1903 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1904 $text = $this->replaceVariables( $text );
1908 $n = $user->getName();
1909 $k = $user->getOption( "nickname" );
1910 if ( "" == $k ) { $k = $n; }
1911 if(isset($wgLocaltimezone)) {
1912 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1914 /* Note: this is an ugly timezone hack for the European wikis */
1915 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1916 " (" . date( "T" ) . ")";
1917 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1919 $text = preg_replace( "/~~~~~/", $d, $text );
1920 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1921 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1922 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1923 Namespace::getUser() ) . ":$n|$k]]", $text );
1925 # Context links: [[|name]] and [[name (context)|]]
1927 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1928 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1929 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1930 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1932 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1933 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1934 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1935 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1936 # [[ns:page (cont)|]]
1938 $t = $this->mTitle
->getText();
1939 if ( preg_match( $conpat, $t, $m ) ) {
1942 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1943 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1944 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1946 if ( "" == $context ) {
1947 $text = preg_replace( $p2, "[[\\1]]", $text );
1949 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1953 $mw =& MagicWord::get( MAG_SUBST );
1954 $wgCurParser = $this->fork();
1955 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1956 $this->merge( $wgCurParser );
1959 # Trim trailing whitespace
1960 # MAG_END (__END__) tag allows for trailing
1961 # whitespace to be deliberately included
1962 $text = rtrim( $text );
1963 $mw =& MagicWord
::get( MAG_END
);
1964 $mw->matchAndRemove( $text );
1969 # Set up some variables which are usually set up in parse()
1970 # so that an external function can call some class members with confidence
1971 function startExternalParse( &$title, $options, $outputType, $clearState = true )
1973 $this->mTitle
=& $title;
1974 $this->mOptions
= $options;
1975 $this->mOutputType
= $outputType;
1976 if ( $clearState ) {
1977 $this->clearState();
1981 function transformMsg( $text, $options ) {
1983 static $executing = false;
1985 # Guard against infinite recursion
1991 $this->mTitle
= $wgTitle;
1992 $this->mOptions
= $options;
1993 $this->mOutputType
= OT_MSG
;
1994 $this->clearState();
1995 $text = $this->replaceVariables( $text );
2004 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2005 var $mCacheTime; # Used in ParserCache
2007 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2008 $containsOldMagic = false )
2010 $this->mText
= $text;
2011 $this->mLanguageLinks
= $languageLinks;
2012 $this->mCategoryLinks
= $categoryLinks;
2013 $this->mContainsOldMagic
= $containsOldMagic;
2014 $this->mCacheTime
= "";
2017 function getText() { return $this->mText
; }
2018 function getLanguageLinks() { return $this->mLanguageLinks
; }
2019 function getCategoryLinks() { return $this->mCategoryLinks
; }
2020 function getCacheTime() { return $this->mCacheTime
; }
2021 function containsOldMagic() { return $this->mContainsOldMagic
; }
2022 function setText( $text ) { return wfSetVar( $this->mText
, $text ); }
2023 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks
, $ll ); }
2024 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks
, $cl ); }
2025 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic
, $com ); }
2026 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime
, $t ); }
2028 function merge( $other ) {
2029 $this->mLanguageLinks
= array_merge( $this->mLanguageLinks
, $other->mLanguageLinks
);
2030 $this->mCategoryLinks
= array_merge( $this->mCategoryLinks
, $this->mLanguageLinks
);
2031 $this->mContainsOldMagic
= $this->mContainsOldMagic ||
$other->mContainsOldMagic
;
2038 # All variables are private
2039 var $mUseTeX; # Use texvc to expand <math> tags
2040 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2041 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2042 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2043 var $mAllowExternalImages; # Allow external images inline
2044 var $mSkin; # Reference to the preferred skin
2045 var $mDateFormat; # Date format index
2046 var $mEditSection; # Create "edit section" links
2047 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2048 var $mNumberHeadings; # Automatically number headings
2049 var $mShowToc; # Show table of contents
2051 function getUseTeX() { return $this->mUseTeX
; }
2052 function getUseCategoryMagic() { return $this->mUseCategoryMagic
; }
2053 function getUseDynamicDates() { return $this->mUseDynamicDates
; }
2054 function getInterwikiMagic() { return $this->mInterwikiMagic
; }
2055 function getAllowExternalImages() { return $this->mAllowExternalImages
; }
2056 function getSkin() { return $this->mSkin
; }
2057 function getDateFormat() { return $this->mDateFormat
; }
2058 function getEditSection() { return $this->mEditSection
; }
2059 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick
; }
2060 function getNumberHeadings() { return $this->mNumberHeadings
; }
2061 function getShowToc() { return $this->mShowToc
; }
2063 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX
, $x ); }
2064 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic
, $x ); }
2065 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates
, $x ); }
2066 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic
, $x ); }
2067 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages
, $x ); }
2068 function setSkin( $x ) { return wfSetRef( $this->mSkin
, $x ); }
2069 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat
, $x ); }
2070 function setEditSection( $x ) { return wfSetVar( $this->mEditSection
, $x ); }
2071 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick
, $x ); }
2072 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings
, $x ); }
2073 function setShowToc( $x ) { return wfSetVar( $this->mShowToc
, $x ); }
2075 /* static */ function newFromUser( &$user )
2077 $popts = new ParserOptions
;
2078 $popts->initialiseFromUser( $user );
2082 function initialiseFromUser( &$userInput )
2084 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2086 if ( !$userInput ) {
2088 $user->setLoaded( true );
2090 $user =& $userInput;
2093 $this->mUseTeX
= $wgUseTeX;
2094 $this->mUseCategoryMagic
= $wgUseCategoryMagic;
2095 $this->mUseDynamicDates
= $wgUseDynamicDates;
2096 $this->mInterwikiMagic
= $wgInterwikiMagic;
2097 $this->mAllowExternalImages
= $wgAllowExternalImages;
2098 $this->mSkin
=& $user->getSkin();
2099 $this->mDateFormat
= $user->getOption( "date" );
2100 $this->mEditSection
= $user->getOption( "editsection" );
2101 $this->mEditSectionOnRightClick
= $user->getOption( "editsectiononrightclick" );
2102 $this->mNumberHeadings
= $user->getOption( "numberheadings" );
2103 $this->mShowToc
= $user->getOption( "showtoc" );
2109 # Regex callbacks, used in Parser::replaceVariables
2110 function wfBraceSubstitution( $matches )
2112 global $wgCurParser;
2113 return $wgCurParser->braceSubstitution( $matches );
2116 function wfArgSubstitution( $matches )
2118 global $wgCurParser;
2119 return $wgCurParser->argSubstitution( $matches );
2122 function wfVariableSubstitution( $matches )
2124 global $wgCurParser;
2125 return $wgCurParser->variableSubstitution( $matches );