3 // require_once('Tokenizer.php');
5 if( $GLOBALS['wgUseWikiHiero'] ){
6 require_once('extensions/wikihiero/wikihiero.php');
8 if( $GLOBALS['wgUseTimeline'] ){
9 require_once('extensions/timeline/Timeline.php');
14 # Processes wiki markup
16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
20 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
24 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
25 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
28 # * only within ParserOptions
31 #----------------------------------------
32 # Variable substitution O(N^2) attack
33 #-----------------------------------------
34 # Without countermeasures, it would be possible to attack the parser by saving a page
35 # filled with a large number of inclusions of large pages. The size of the generated
36 # page would be proportional to the square of the input size. Hence, we limit the number
37 # of inclusions of any given page, thus bringing any attack back to O(N).
40 define( "MAX_INCLUDE_REPEAT", 5 );
42 # Allowed values for $mOutputType
43 define( "OT_HTML", 1 );
44 define( "OT_WIKI", 2 );
45 define( "OT_MSG", 3 );
47 # string parameter for extractTags which will cause it
48 # to strip HTML comments in addition to regular
49 # <XML>-style tags. This should not be anything we
50 # may want to use in wikisyntax
51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
53 # prefix for escaping, used in two functions at least
54 define( "UNIQ_PREFIX", "NaodW29");
58 # Cleared with clearState():
59 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
60 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
63 var $mOptions, $mTitle, $mOutputType;
72 $this->mOutput
= new ParserOutput
;
73 $this->mAutonumber
= 0;
74 $this->mLastSection
= "";
75 $this->mDTopen
= false;
76 $this->mVariables
= false;
77 $this->mIncludeCount
= array();
78 $this->mStripState
= array();
79 $this->mArgStack
= array();
82 # First pass--just handle <nowiki> sections, pass the rest off
83 # to internalParse() which does all the real work.
85 # Returns a ParserOutput
87 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
90 $fname = "Parser::parse";
91 wfProfileIn( $fname );
97 $this->mOptions
= $options;
98 $this->mTitle
=& $title;
99 $this->mOutputType
= OT_HTML
;
102 $text = $this->strip( $text, $this->mStripState
);
103 $text = $this->internalParse( $text, $linestart );
104 $text = $this->unstrip( $text, $this->mStripState
);
105 # Clean up special characters, only run once, next-to-last before doBlockLevels
108 "/<hr *>/i" => '<hr/>',
109 "/<br *>/i" => '<br/>',
110 "/<center *>/i"=>'<div class="center">',
111 "/<\\/center *>/i" => '</div>',
112 # Clean up spare ampersands; note that we probably ought to be
113 # more careful about named entities.
114 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&'
116 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
119 "/<center *>/i"=>'<div class="center">',
120 "/<\\/center *>/i" => '</div>'
122 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
125 $text = $this->doBlockLevels( $text, $linestart );
127 $text = $this->tidy($text);
129 $this->mOutput
->setText( $text );
130 wfProfileOut( $fname );
131 return $this->mOutput
;
134 /* static */ function getRandomString()
136 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
139 # Replaces all occurrences of <$tag>content</$tag> in the text
140 # with a random marker and returns the new text. the output parameter
141 # $content will be an associative array filled with data on the form
142 # $unique_marker => content.
144 # If $content is already set, the additional entries will be appended
146 # If $tag is set to STRIP_COMMENTS, the function will extract
147 # <!-- HTML comments -->
149 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
150 $rnd = $uniq_prefix . '-' . $tag . Parser
::getRandomString();
157 while ( "" != $text ) {
158 if($tag==STRIP_COMMENTS
) {
159 $p = preg_split( "/<!--/i", $text, 2 );
161 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
164 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) {
167 if($tag==STRIP_COMMENTS
) {
168 $q = preg_split( "/-->/i", $p[1], 2 );
170 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
172 $marker = $rnd . sprintf("%08X", $n++
);
173 $content[$marker] = $q[0];
174 $stripped .= $marker;
181 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
182 # If $render is set, performs necessary rendering operations on plugins
183 # Returns the text, and fills an array with data needed in unstrip()
184 # If the $state is already a valid strip state, it adds to the state
186 # When $stripcomments is set, HTML comments <!-- like this -->
187 # will be stripped in addition to other tags. This is important
188 # for section editing, where these comments cause confusion when
189 # counting the sections in the wikisource
190 function strip( $text, &$state, $stripcomments = false )
192 $render = ($this->mOutputType
== OT_HTML
);
193 $nowiki_content = array();
194 $hiero_content = array();
195 $timeline_content = array();
196 $math_content = array();
197 $pre_content = array();
198 $comment_content = array();
200 # Replace any instances of the placeholders
201 $uniq_prefix = UNIQ_PREFIX
;
202 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
204 $text = Parser
::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
205 foreach( $nowiki_content as $marker => $content ){
207 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
209 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
213 $text = Parser
::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
214 foreach( $hiero_content as $marker => $content ){
215 if( $render && $GLOBALS['wgUseWikiHiero']){
216 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML
);
218 $hiero_content[$marker] = "<hiero>$content</hiero>";
222 $text = Parser
::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
223 foreach( $timeline_content as $marker => $content ){
224 if( $render && $GLOBALS['wgUseTimeline']){
225 $timeline_content[$marker] = renderTimeline( $content );
227 $timeline_content[$marker] = "<timeline>$content</timeline>";
231 $text = Parser
::extractTags("math", $text, $math_content, $uniq_prefix);
232 foreach( $math_content as $marker => $content ){
234 if( $this->mOptions
->getUseTeX() ) {
235 $math_content[$marker] = renderMath( $content );
237 $math_content[$marker] = "<math>$content<math>";
240 $math_content[$marker] = "<math>$content</math>";
244 $text = Parser
::extractTags("pre", $text, $pre_content, $uniq_prefix);
245 foreach( $pre_content as $marker => $content ){
247 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
249 $pre_content[$marker] = "<pre>$content</pre>";
253 $text = Parser
::extractTags(STRIP_COMMENTS
, $text, $comment_content, $uniq_prefix);
254 foreach( $comment_content as $marker => $content ){
255 $comment_content[$marker] = "<!--$content-->";
259 # Merge state with the pre-existing state, if there is one
261 $state['nowiki'] = $state['nowiki'] +
$nowiki_content;
262 $state['hiero'] = $state['hiero'] +
$hiero_content;
263 $state['timeline'] = $state['timeline'] +
$timeline_content;
264 $state['math'] = $state['math'] +
$math_content;
265 $state['pre'] = $state['pre'] +
$pre_content;
266 $state['comment'] = $state['comment'] +
$comment_content;
269 'nowiki' => $nowiki_content,
270 'hiero' => $hiero_content,
271 'timeline' => $timeline_content,
272 'math' => $math_content,
273 'pre' => $pre_content,
274 'comment' => $comment_content
280 function unstrip( $text, &$state )
282 # Must expand in reverse order, otherwise nested tags will be corrupted
283 $contentDict = end( $state );
284 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
285 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
286 $text = str_replace( key( $contentDict ), $content, $text );
293 # Add an item to the strip state
294 # Returns the unique tag which must be inserted into the stripped text
295 # The tag will be replaced with the original text in unstrip()
297 function insertStripItem( $text, &$state )
299 $rnd = UNIQ_PREFIX
. '-item' . Parser
::getRandomString();
308 $state['item'][$rnd] = $text;
312 # This method generates the list of subcategories and pages for a category
313 function categoryMagic ()
315 global $wgLang , $wgUser ;
316 if ( !$this->mOptions
->getUseCategoryMagic() ) return ; # Doesn't use categories at all
318 $cns = Namespace::getCategory() ;
319 if ( $this->mTitle
->getNamespace() != $cns ) return "" ; # This ain't a category page
321 $r = "<br style=\"clear:both;\"/>\n";
324 $sk =& $wgUser->getSkin() ;
326 $articles = array() ;
327 $children = array() ;
329 $id = $this->mTitle
->getArticleID() ;
332 $t = wfStrencode( $this->mTitle
->getDBKey() );
333 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
334 $res = wfQuery ( $sql, DB_READ
) ;
335 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
337 # For all pages that link to this category
338 foreach ( $data AS $x )
340 $t = $wgLang->getNsText ( $x->cur_namespace
) ;
341 if ( $t != "" ) $t .= ":" ;
342 $t .= $x->cur_title
;
344 if ( $x->cur_namespace
== $cns ) {
345 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
347 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
350 wfFreeResult ( $res ) ;
352 # Showing subcategories
353 if ( count ( $children ) > 0 ) {
354 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
355 $r .= implode ( ", " , $children ) ;
358 # Showing pages in this category
359 if ( count ( $articles ) > 0 ) {
360 $ti = $this->mTitle
->getText() ;
361 $h = wfMsg( "category_header", $ti );
362 $r .= "<h2>{$h}</h2>\n" ;
363 $r .= implode ( ", " , $articles ) ;
370 function getHTMLattrs ()
372 $htmlattrs = array( # Allowed attributes--no scripting, etc.
373 "title", "align", "lang", "dir", "width", "height",
374 "bgcolor", "clear", /* BR */ "noshade", /* HR */
375 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
376 /* FONT */ "type", "start", "value", "compact",
377 /* For various lists, mostly deprecated but safe */
378 "summary", "width", "border", "frame", "rules",
379 "cellspacing", "cellpadding", "valign", "char",
380 "charoff", "colgroup", "col", "span", "abbr", "axis",
381 "headers", "scope", "rowspan", "colspan", /* Tables */
382 "id", "class", "name", "style" /* For CSS */
387 function fixTagAttributes ( $t )
389 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
390 $htmlattrs = $this->getHTMLattrs() ;
392 # Strip non-approved attributes from the tag
394 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
395 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
397 # Strip javascript "expression" from stylesheets. Brute force approach:
398 # If anythin offensive is found, all attributes of the HTML tag are dropped
401 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
402 wfMungeToUtf8( $t ) ) )
410 /* interface with html tidy, used if $wgUseTidy = true */
411 function tidy ( $text ) {
412 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
413 global $wgInputEncoding, $wgOutputEncoding;
415 switch(strtoupper($wgOutputEncoding)) {
417 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)?
' -latin1':' -raw';
420 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)?
' -utf8':' -raw';
423 $wgTidyOpts .= ' -raw';
426 $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
427 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
428 '<head><title>test</title></head><body>'.$text.'</body></html>';
429 $descriptorspec = array(
430 0 => array("pipe", "r"),
431 1 => array("pipe", "w"),
432 2 => array("file", "/dev/null", "a")
434 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
435 if (is_resource($process)) {
436 fwrite($pipes[0], $text);
438 while (!feof($pipes[1])) {
439 $cleansource .= fgets($pipes[1], 1024);
442 $return_value = proc_close($process);
444 if( $cleansource == '' && $text != '') {
445 return '<h2>'.wfMsg('seriousxhtmlerrors').'</h2><pre>'.htmlspecialchars($text).'</pre>';
451 function doTableStuff ( $t )
453 $t = explode ( "\n" , $t ) ;
454 $td = array () ; # Is currently a td tag open?
455 $ltd = array () ; # Was it TD or TH?
456 $tr = array () ; # Is currently a tr tag open?
457 $ltr = array () ; # tr attributes
458 foreach ( $t AS $k => $x )
461 $fc = substr ( $x , 0 , 1 ) ;
462 if ( "{|" == substr ( $x , 0 , 2 ) )
464 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
465 array_push ( $td , false ) ;
466 array_push ( $ltd , "" ) ;
467 array_push ( $tr , false ) ;
468 array_push ( $ltr , "" ) ;
470 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
471 else if ( "|}" == substr ( $x , 0 , 2 ) )
474 $l = array_pop ( $ltd ) ;
475 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
476 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
480 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
482 $z = trim ( substr ( $x , 2 ) ) ;
483 $t[$k] = "<caption>{$z}</caption>\n" ;
485 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
487 $x = substr ( $x , 1 ) ;
488 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
490 $l = array_pop ( $ltd ) ;
491 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
492 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
495 array_push ( $tr , false ) ;
496 array_push ( $td , false ) ;
497 array_push ( $ltd , "" ) ;
498 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
500 else if ( "|" == $fc ||
"!" == $fc ||
"|+" == substr ( $x , 0 , 2 ) ) # Caption
502 if ( "|+" == substr ( $x , 0 , 2 ) )
505 $x = substr ( $x , 1 ) ;
507 $after = substr ( $x , 1 ) ;
508 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
509 $after = explode ( "||" , $after ) ;
511 foreach ( $after AS $theline )
516 $tra = array_pop ( $ltr ) ;
517 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
518 array_push ( $tr , true ) ;
519 array_push ( $ltr , "" ) ;
522 $l = array_pop ( $ltd ) ;
523 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
524 if ( $fc == "|" ) $l = "td" ;
525 else if ( $fc == "!" ) $l = "th" ;
526 else if ( $fc == "+" ) $l = "caption" ;
528 array_push ( $ltd , $l ) ;
529 $y = explode ( "|" , $theline , 2 ) ;
530 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
531 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
533 array_push ( $td , true ) ;
538 # Closing open td, tr && table
539 while ( count ( $td ) > 0 )
541 if ( array_pop ( $td ) ) $t[] = "</td>" ;
542 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
546 $t = implode ( "\n" , $t ) ;
547 # $t = $this->removeHTMLtags( $t );
551 # Parses the text and adds the result to the strip state
552 # Returns the strip tag
553 function stripParse( $text, $linestart, $args )
555 $text = $this->strip( $text, $this->mStripState
);
556 $text = $this->internalParse( $text, $linestart, $args, false );
558 $text = "\n" . $text;
560 return $this->insertStripItem( $text, $this->mStripState
);
563 function internalParse( $text, $linestart, $args = array(), $isMain=true )
565 $fname = "Parser::internalParse";
566 wfProfileIn( $fname );
568 $text = $this->removeHTMLtags( $text );
569 $text = $this->replaceVariables( $text, $args );
571 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr/>", $text );
573 $text = $this->doHeadings( $text );
574 if($this->mOptions
->getUseDynamicDates()) {
575 global $wgDateFormatter;
576 $text = $wgDateFormatter->reformat( $this->mOptions
->getDateFormat(), $text );
578 $text = $this->doAllQuotes( $text );
579 $text = $this->replaceExternalLinks( $text );
580 $text = $this->replaceInternalLinks ( $text );
581 //$text = $this->doTokenizedParser ( $text );
582 $text = $this->doTableStuff ( $text ) ;
583 $text = $this->magicISBN( $text );
584 $text = $this->magicRFC( $text );
585 $text = $this->formatHeadings( $text, $isMain );
586 $sk =& $this->mOptions
->getSkin();
587 $text = $sk->transformContent( $text );
589 if ( !isset ( $this->categoryMagicDone
) ) {
590 $text .= $this->categoryMagic () ;
591 $this->categoryMagicDone
= true ;
594 wfProfileOut( $fname );
599 /* private */ function doHeadings( $text )
601 for ( $i = 6; $i >= 1; --$i ) {
602 $h = substr( "======", 0, $i );
603 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
604 "<h{$i}>\\1</h{$i}>\\2", $text );
609 /* private */ function doAllQuotes( $text )
612 $lines = explode( "\r\n", $text );
613 foreach ( $lines as $line ) {
614 $outtext .= $this->doQuotes ( "", $line, "" ) . "\r\n";
619 /* private */ function doQuotes( $pre, $text, $mode )
621 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
622 $m1_strong = ($m[1] == "") ?
"" : "<strong>{$m[1]}</strong>";
623 $m1_em = ($m[1] == "") ?
"" : "<em>{$m[1]}</em>";
624 if ( substr ($m[2], 0, 1) == "'" ) {
625 $m[2] = substr ($m[2], 1);
627 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ?
"both" : "emstrong" );
628 } else if ($mode == "strong") {
629 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
630 } else if (($mode == "emstrong") ||
($mode == "both")) {
631 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
632 } else if ($mode == "strongem") {
633 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
635 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
638 if ($mode == "strong") {
639 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ?
"both" : "strongem" );
640 } else if ($mode == "em") {
641 return $m1_em . $this->doQuotes ( "", $m[2], "" );
642 } else if ($mode == "emstrong") {
643 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
644 } else if (($mode == "strongem") ||
($mode == "both")) {
645 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
647 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
651 $text_strong = ($text == "") ?
"" : "<strong>{$text}</strong>";
652 $text_em = ($text == "") ?
"" : "<em>{$text}</em>";
655 } else if ($mode == "em") {
656 return $pre . $text_em;
657 } else if ($mode == "strong") {
658 return $pre . $text_strong;
659 } else if ($mode == "strongem") {
660 return (($pre == "") && ($text == "")) ?
"" : "<strong>{$pre}{$text_em}</strong>";
662 return (($pre == "") && ($text == "")) ?
"" : "<em>{$pre}{$text_strong}</em>";
667 # Note: we have to do external links before the internal ones,
668 # and otherwise take great care in the order of things here, so
669 # that we don't end up interpreting some URLs twice.
671 /* private */ function replaceExternalLinks( $text )
673 $fname = "Parser::replaceExternalLinks";
674 wfProfileIn( $fname );
675 $text = $this->subReplaceExternalLinks( $text, "http", true );
676 $text = $this->subReplaceExternalLinks( $text, "https", true );
677 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
678 $text = $this->subReplaceExternalLinks( $text, "irc", false );
679 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
680 $text = $this->subReplaceExternalLinks( $text, "news", false );
681 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
682 wfProfileOut( $fname );
686 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
688 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
689 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
691 # this is the list of separators that should be ignored if they
692 # are the last character of an URL but that should be included
693 # if they occur within the URL, e.g. "go to www.foo.com, where .."
694 # in this case, the last comma should not become part of the URL,
695 # but in "www.foo.com/123,2342,32.htm" it should.
697 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
698 $images = "gif|png|jpg|jpeg";
700 # PLEASE NOTE: The curly braces { } are not part of the regex,
701 # they are interpreted as part of the string (used to tell PHP
702 # that the content of the string should be inserted there).
703 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
704 "((?i){$images})([^{$uc}]|$)/";
706 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
707 $sk =& $this->mOptions
->getSkin();
709 if ( $autonumber and $this->mOptions
->getAllowExternalImages() ) { # Use img tags only for HTTP urls
710 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
711 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
713 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
714 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
715 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
717 $s = str_replace( $unique, $protocol, $s );
719 $a = explode( "[{$protocol}:", " " . $s );
720 $s = array_shift( $a );
721 $s = substr( $s, 1 );
723 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
724 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
726 foreach ( $a as $line ) {
727 if ( preg_match( $e1, $line, $m ) ) {
728 $link = "{$protocol}:{$m[1]}";
730 if ( $autonumber ) { $text = "[" . ++
$this->mAutonumber
. "]"; }
731 else { $text = wfEscapeHTML( $link ); }
732 } else if ( preg_match( $e2, $line, $m ) ) {
733 $link = "{$protocol}:{$m[1]}";
737 $s .= "[{$protocol}:" . $line;
740 if( $link == $text ||
preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
743 # Expand the URL for printable version
744 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
746 $la = $sk->getExternalLinkAttributes( $link, $text );
747 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
754 /* private */ function replaceInternalLinks( $s )
756 global $wgLang, $wgLinkCache;
757 global $wgNamespacesWithSubpages, $wgLanguageCode;
758 static $fname = "Parser::replaceInternalLink" ;
759 wfProfileIn( $fname );
761 wfProfileIn( "$fname-setup" );
763 # the % is needed to support urlencoded titles as well
764 if ( !$tc ) { $tc = Title
::legalChars() . "#%"; }
765 $sk =& $this->mOptions
->getSkin();
767 $a = explode( "[[", " " . $s );
768 $s = array_shift( $a );
769 $s = substr( $s, 1 );
771 # Match a link having the form [[namespace:link|alternate]]trail
773 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
774 # Match the end of a line for a word that's not followed by whitespace,
775 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
776 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
777 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
778 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
781 # Special and Media are pseudo-namespaces; no pages actually exist in them
782 static $image = FALSE;
783 static $special = FALSE;
784 static $media = FALSE;
785 static $category = FALSE;
786 if ( !$image ) { $image = Namespace::getImage(); }
787 if ( !$special ) { $special = Namespace::getSpecial(); }
788 if ( !$media ) { $media = Namespace::getMedia(); }
789 if ( !$category ) { $category = Namespace::getCategory(); }
791 $nottalk = !Namespace::isTalk( $this->mTitle
->getNamespace() );
793 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
800 wfProfileOut( "$fname-setup" );
802 foreach ( $a as $line ) {
803 $prefix = $new_prefix;
805 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
807 # fix up urlencoded title texts
808 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
810 } else { # Invalid form; output directly
811 $s .= $prefix . "[[" . $line ;
812 wfProfileOut( $fname );
818 :Foobar -- override special treatment of prefix (images, language links)
819 /Foobar -- convert to CurrentPage/Foobar
820 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
822 $c = substr($m[1],0,1);
823 $noforce = ($c != ":");
824 if( $c == "/" ) { # subpage
825 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
826 $m[1]=substr($m[1],1,strlen($m[1])-2);
829 $noslash=substr($m[1],1);
831 if(!empty($wgNamespacesWithSubpages[$this->mTitle
->getNamespace()])) { # subpages allowed here
832 $link = $this->mTitle
->getPrefixedText(). "/" . trim($noslash);
835 } # this might be changed for ugliness reasons
837 $link = $noslash; # no subpage allowed, use standard link
839 } elseif( $noforce ) { # no subpage
842 $link = substr( $m[1], 1 );
844 $wasblank = ( "" == $text );
848 $nt = Title
::newFromText( $link );
850 $s .= $prefix . "[[" . $line;
851 wfProfileOut( $fname );
854 $ns = $nt->getNamespace();
855 $iw = $nt->getInterWiki();
857 if( $iw && $this->mOptions
->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
858 array_push( $this->mOutput
->mLanguageLinks
, $nt->getPrefixedText() );
859 $s .= $prefix . $trail ;
860 wfProfileOut( $fname );
861 return (trim($s) == '')?
'': $s;
864 if ( $ns == $image ) {
865 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
866 $wgLinkCache->addImageLinkObj( $nt );
867 wfProfileOut( $fname );
869 } else if ( $ns == $category ) {
870 $t = $nt->getText() ;
871 $nnt = Title
::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
873 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
874 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
875 $wgLinkCache->resume();
877 $sortkey = $wasblank ?
$this->mTitle
->getPrefixedText() : $text;
878 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
879 $this->mOutput
->mCategoryLinks
[] = $t ;
880 $s .= $prefix . $trail ;
881 wfProfileOut( $fname );
885 if( ( $nt->getPrefixedText() == $this->mTitle
->getPrefixedText() ) &&
886 ( strpos( $link, "#" ) == FALSE ) ) {
887 # Self-links are handled specially; generally de-link and change to bold.
888 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
889 wfProfileOut( $fname );
893 if( $ns == $media ) {
894 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
895 $wgLinkCache->addImageLinkObj( $nt );
896 wfProfileOut( $fname );
898 } elseif( $ns == $special ) {
899 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
900 wfProfileOut( $fname );
903 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
905 wfProfileOut( $fname );
909 # Some functions here used by doBlockLevels()
911 /* private */ function closeParagraph()
914 if ( '' != $this->mLastSection
) {
915 $result = "</" . $this->mLastSection
. ">\n";
917 $this->mInPre
= false;
918 $this->mLastSection
= "";
921 # getCommon() returns the length of the longest common substring
922 # of both arguments, starting at the beginning of both.
924 /* private */ function getCommon( $st1, $st2 )
926 $fl = strlen( $st1 );
927 $shorter = strlen( $st2 );
928 if ( $fl < $shorter ) { $shorter = $fl; }
930 for ( $i = 0; $i < $shorter; ++
$i ) {
931 if ( $st1{$i} != $st2{$i} ) { break; }
935 # These next three functions open, continue, and close the list
936 # element appropriate to the prefix character passed into them.
938 /* private */ function openList( $char )
940 $result = $this->closeParagraph();
942 if ( "*" == $char ) { $result .= "<ul><li>"; }
943 else if ( "#" == $char ) { $result .= "<ol><li>"; }
944 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
945 else if ( ";" == $char ) {
946 $result .= "<dl><dt>";
947 $this->mDTopen
= true;
949 else { $result = "<!-- ERR 1 -->"; }
954 /* private */ function nextItem( $char )
956 if ( "*" == $char ||
"#" == $char ) { return "</li><li>"; }
957 else if ( ":" == $char ||
";" == $char ) {
959 if ( $this->mDTopen
) { $close = "</dt>"; }
960 if ( ";" == $char ) {
961 $this->mDTopen
= true;
962 return $close . "<dt>";
964 $this->mDTopen
= false;
965 return $close . "<dd>";
968 return "<!-- ERR 2 -->";
971 /* private */function closeList( $char )
973 if ( "*" == $char ) { $text = "</li></ul>"; }
974 else if ( "#" == $char ) { $text = "</li></ol>"; }
975 else if ( ":" == $char ) {
976 if ( $this->mDTopen
) {
977 $this->mDTopen
= false;
978 $text = "</dt></dl>";
980 $text = "</dd></dl>";
983 else { return "<!-- ERR 3 -->"; }
987 /* private */ function doBlockLevels( $text, $linestart ) {
988 $fname = "Parser::doBlockLevels";
989 wfProfileIn( $fname );
991 # Parsing through the text line by line. The main thing
992 # happening here is handling of block-level elements p, pre,
993 # and making lists from lines starting with * # : etc.
995 $textLines = explode( "\n", $text );
997 $lastPrefix = $output = $lastLine = '';
998 $this->mDTopen
= $inBlockElem = false;
1000 $paragraphStack = false;
1002 if ( !$linestart ) {
1003 $output .= array_shift( $textLines );
1005 foreach ( $textLines as $oLine ) {
1006 $lastPrefixLength = strlen( $lastPrefix );
1007 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1008 $preOpenMatch = preg_match("/<pre/i", $oLine );
1009 if (!$this->mInPre
) {
1010 $this->mInPre
= !empty($preOpenMatch);
1012 if ( !$this->mInPre
) {
1013 # Multiple prefixes may abut each other for nested lists.
1014 $prefixLength = strspn( $oLine, "*#:;" );
1015 $pref = substr( $oLine, 0, $prefixLength );
1018 $pref2 = str_replace( ";", ":", $pref );
1019 $t = substr( $oLine, $prefixLength );
1021 # Don't interpret any other prefixes in preformatted text
1023 $pref = $pref2 = '';
1028 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1029 # Same as the last item, so no need to deal with nesting or opening stuff
1030 $output .= $this->nextItem( substr( $pref, -1 ) );
1031 $paragraphStack = false;
1033 if ( ";" == substr( $pref, -1 ) ) {
1034 # The one nasty exception: definition lists work like this:
1035 # ; title : definition text
1036 # So we check for : in the remainder text to split up the
1037 # title and definition, without b0rking links.
1038 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1039 if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
1041 $output .= $term . $this->nextItem( ":" );
1045 } elseif( $prefixLength ||
$lastPrefixLength ) {
1046 # Either open or close a level...
1047 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1048 $paragraphStack = false;
1050 while( $commonPrefixLength < $lastPrefixLength ) {
1051 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1052 --$lastPrefixLength;
1054 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1055 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1057 while ( $prefixLength > $commonPrefixLength ) {
1058 $char = substr( $pref, $commonPrefixLength, 1 );
1059 $output .= $this->openList( $char );
1061 if ( ";" == $char ) {
1062 # FIXME: This is dupe of code above
1063 if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
1065 $output .= $term . $this->nextItem( ":" );
1069 ++
$commonPrefixLength;
1071 $lastPrefix = $pref2;
1073 if( 0 == $prefixLength ) {
1074 # No prefix (not in list)--go to paragraph mode
1075 $uniq_prefix = UNIQ_PREFIX
;
1076 // XXX: use a stack for nestable elements like span, table and div
1077 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1078 $closematch = preg_match(
1079 "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1080 "<div|<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1081 if ( $openmatch or $closematch ) {
1082 $paragraphStack = false;
1083 $output .= $this->closeParagraph();
1084 if($preOpenMatch and !$preCloseMatch) {
1085 $this->mInPre
= true;
1087 if ( $closematch ) {
1088 $inBlockElem = false;
1090 $inBlockElem = true;
1092 } else if ( !$inBlockElem && !$this->mInPre
) {
1093 if ( " " == $t{0} and trim($t) != '' ) {
1095 if ($this->mLastSection
!= 'pre') {
1096 $paragraphStack = false;
1097 $output .= $this->closeParagraph().'<pre>';
1098 $this->mLastSection
= 'pre';
1102 if ( '' == trim($t) ) {
1103 if ( $paragraphStack ) {
1104 $output .= $paragraphStack.'<br/>';
1105 $paragraphStack = false;
1106 $this->mLastSection
= 'p';
1108 if ($this->mLastSection
!= 'p' ) {
1109 $output .= $this->closeParagraph();
1110 $this->mLastSection
= '';
1111 $paragraphStack = "<p>";
1113 $paragraphStack = '</p><p>';
1117 if ( $paragraphStack ) {
1118 $output .= $paragraphStack;
1119 $paragraphStack = false;
1120 $this->mLastSection
= 'p';
1121 } else if ($this->mLastSection
!= 'p') {
1122 $output .= $this->closeParagraph().'<p>';
1123 $this->mLastSection
= 'p';
1129 if ($paragraphStack === false) {
1133 while ( $prefixLength ) {
1134 $output .= $this->closeList( $pref2{$prefixLength-1} );
1137 if ( "" != $this->mLastSection
) {
1138 $output .= "</" . $this->mLastSection
. ">";
1139 $this->mLastSection
= "";
1142 wfProfileOut( $fname );
1146 function getVariableValue( $index ) {
1147 global $wgLang, $wgSitename, $wgServer;
1150 case MAG_CURRENTMONTH
:
1152 case MAG_CURRENTMONTHNAME
:
1153 return $wgLang->getMonthName( date("n") );
1154 case MAG_CURRENTMONTHNAMEGEN
:
1155 return $wgLang->getMonthNameGen( date("n") );
1156 case MAG_CURRENTDAY
:
1159 return $this->mTitle
->getText();
1161 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1162 return $wgLang->getNsText($this->mTitle
->getNamespace()); // Patch by Dori
1163 case MAG_CURRENTDAYNAME
:
1164 return $wgLang->getWeekdayName( date("w")+
1 );
1165 case MAG_CURRENTYEAR
:
1167 case MAG_CURRENTTIME
:
1168 return $wgLang->time( wfTimestampNow(), false );
1169 case MAG_NUMBEROFARTICLES
:
1170 return wfNumberOfArticles();
1180 function initialiseVariables()
1182 global $wgVariableIDs;
1183 $this->mVariables
= array();
1184 foreach ( $wgVariableIDs as $id ) {
1185 $mw =& MagicWord
::get( $id );
1186 $mw->addToArray( $this->mVariables
, $this->getVariableValue( $id ) );
1190 /* private */ function replaceVariables( $text, $args = array() )
1192 global $wgLang, $wgScript, $wgArticlePath;
1194 $fname = "Parser::replaceVariables";
1195 wfProfileIn( $fname );
1198 if ( !$this->mVariables
) {
1199 $this->initialiseVariables();
1201 $titleChars = Title
::legalChars();
1203 # This function is called recursively. To keep track of arguments we need a stack:
1204 array_push( $this->mArgStack
, $args );
1206 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1207 $GLOBALS['wgCurParser'] =& $this;
1209 # Argument substitution
1210 if ( $this->mOutputType
== OT_HTML
) {
1211 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1214 # Double brace substitution
1215 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1216 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1218 array_pop( $this->mArgStack
);
1220 wfProfileOut( $fname );
1224 function braceSubstitution( $matches )
1226 global $wgLinkCache, $wgLang;
1227 $fname = "Parser::braceSubstitution";
1234 # $newline is an optional newline character before the braces
1235 # $part1 is the bit before the first |, and must contain only title characters
1236 # $args is a list of arguments, starting from index 0, not including $part1
1238 $newline = $matches[1];
1239 $part1 = $matches[2];
1240 # If the third subpattern matched anything, it will start with |
1241 if ( $matches[3] !== "" ) {
1242 $args = explode( "|", substr( $matches[3], 1 ) );
1246 $argc = count( $args );
1249 if ( strpos( $matches[0], "{{{" ) !== false ) {
1250 $text = $matches[0];
1257 $mwSubst =& MagicWord
::get( MAG_SUBST
);
1258 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1259 if ( $this->mOutputType
!= OT_WIKI
) {
1260 # Invalid SUBST not replaced at PST time
1261 # Return without further processing
1262 $text = $matches[0];
1266 } elseif ( $this->mOutputType
== OT_WIKI
) {
1267 # SUBST not found in PST pass, do nothing
1268 $text = $matches[0];
1273 # MSG, MSGNW and INT
1276 $mwMsgnw =& MagicWord
::get( MAG_MSGNW
);
1277 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1280 # Remove obsolete MSG:
1281 $mwMsg =& MagicWord
::get( MAG_MSG
);
1282 $mwMsg->matchStartAndRemove( $part1 );
1285 # Check if it is an internal message
1286 $mwInt =& MagicWord
::get( MAG_INT
);
1287 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1288 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1289 $text = wfMsgReal( $part1, $args, true );
1297 # Check for NS: (namespace expansion)
1298 $mwNs = MagicWord
::get( MAG_NS
);
1299 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1300 if ( intval( $part1 ) ) {
1301 $text = $wgLang->getNsText( intval( $part1 ) );
1304 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1305 if ( !is_null( $index ) ) {
1306 $text = $wgLang->getNsText( $index );
1313 # LOCALURL and LOCALURLE
1315 $mwLocal = MagicWord
::get( MAG_LOCALURL
);
1316 $mwLocalE = MagicWord
::get( MAG_LOCALURLE
);
1318 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1319 $func = 'getLocalURL';
1320 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1321 $func = 'escapeLocalURL';
1326 if ( $func !== '' ) {
1327 $title = Title
::newFromText( $part1 );
1328 if ( !is_null( $title ) ) {
1330 $text = $title->$func( $args[0] );
1332 $text = $title->$func();
1339 # Internal variables
1340 if ( !$found && array_key_exists( $part1, $this->mVariables
) ) {
1341 $text = $this->mVariables
[$part1];
1343 $this->mOutput
->mContainsOldMagic
= true;
1346 # Arguments input from the caller
1347 $inputArgs = end( $this->mArgStack );
1348 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1349 $text = $inputArgs[$part1];
1353 # Load from database
1355 $title = Title
::newFromText( $part1, NS_TEMPLATE
);
1356 if ( !is_null( $title ) && !$title->isExternal() ) {
1357 # Check for excessive inclusion
1358 $dbk = $title->getPrefixedDBkey();
1359 if ( $this->incrementIncludeCount( $dbk ) ) {
1360 $article = new Article( $title );
1361 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1362 if ( $articleContent !== false ) {
1364 $text = $articleContent;
1369 # If the title is valid but undisplayable, make a link to it
1370 if ( $this->mOutputType
== OT_HTML
&& !$found ) {
1371 $text = "[[" . $title->getPrefixedText() . "]]";
1377 # Recursive parsing, escaping and link table handling
1378 # Only for HTML output
1379 if ( $nowiki && $found && $this->mOutputType
== OT_HTML
) {
1380 $text = wfEscapeWikiText( $text );
1381 } elseif ( $this->mOutputType
== OT_HTML
&& $found && !$noparse) {
1382 # Clean up argument array
1383 $assocArgs = array();
1385 foreach( $args as $arg ) {
1386 $eqpos = strpos( $arg, "=" );
1387 if ( $eqpos === false ) {
1388 $assocArgs[$index++
] = $arg;
1390 $name = trim( substr( $arg, 0, $eqpos ) );
1391 $value = trim( substr( $arg, $eqpos+
1 ) );
1392 if ( $value === false ) {
1395 if ( $name !== false ) {
1396 $assocArgs[$name] = $value;
1401 # Do not enter included links in link table
1402 if ( !is_null( $title ) ) {
1403 $wgLinkCache->suspend();
1406 # Run full parser on the included text
1407 $text = $this->stripParse( $text, (bool)$newline, $assocArgs );
1409 # Resume the link cache and register the inclusion as a link
1410 if ( !is_null( $title ) ) {
1411 $wgLinkCache->resume();
1412 $wgLinkCache->addLinkObj( $title );
1423 # Triple brace replacement -- used for template arguments
1424 function argSubstitution( $matches )
1426 $newline = $matches[1];
1427 $arg = trim( $matches[2] );
1428 $text = $matches[0];
1429 $inputArgs = end( $this->mArgStack
);
1431 if ( array_key_exists( $arg, $inputArgs ) ) {
1432 $text = $this->stripParse( $inputArgs[$arg], (bool)$newline, array() );
1438 # Returns true if the function is allowed to include this entity
1439 function incrementIncludeCount( $dbk )
1441 if ( !array_key_exists( $dbk, $this->mIncludeCount
) ) {
1442 $this->mIncludeCount
[$dbk] = 0;
1444 if ( ++
$this->mIncludeCount
[$dbk] <= MAX_INCLUDE_REPEAT
) {
1452 # Cleans up HTML, removes dangerous tags and attributes
1453 /* private */ function removeHTMLtags( $text )
1455 global $wgUseTidy, $wgUserHtml;
1456 $fname = "Parser::removeHTMLtags";
1457 wfProfileIn( $fname );
1460 $htmlpairs = array( # Tags that must be closed
1461 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1462 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1463 "strike", "strong", "tt", "var", "div", "center",
1464 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1465 "ruby", "rt" , "rb" , "rp", "p"
1467 $htmlsingle = array(
1468 "br", "hr", "li", "dt", "dd"
1470 $htmlnest = array( # Tags that can be nested--??
1471 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1472 "dl", "font", "big", "small", "sub", "sup"
1474 $tabletags = array( # Can only appear inside table
1478 $htmlpairs = array();
1479 $htmlsingle = array();
1480 $htmlnest = array();
1481 $tabletags = array();
1484 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1485 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1487 $htmlattrs = $this->getHTMLattrs () ;
1489 # Remove HTML comments
1490 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1492 $bits = explode( "<", $text );
1493 $text = array_shift( $bits );
1495 $tagstack = array(); $tablestack = array();
1496 foreach ( $bits as $x ) {
1497 $prev = error_reporting( E_ALL
& ~
( E_NOTICE | E_WARNING
) );
1498 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1500 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1501 error_reporting( $prev );
1504 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1508 if ( ! in_array( $t, $htmlsingle ) &&
1509 ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
1510 if(!empty($ot)) array_push( $tagstack, $ot );
1513 if ( $t == "table" ) {
1514 $tagstack = array_pop( $tablestack );
1519 # Keep track for later
1520 if ( in_array( $t, $tabletags ) &&
1521 ! in_array( "table", $tagstack ) ) {
1523 } else if ( in_array( $t, $tagstack ) &&
1524 ! in_array ( $t , $htmlnest ) ) {
1526 } else if ( ! in_array( $t, $htmlsingle ) ) {
1527 if ( $t == "table" ) {
1528 array_push( $tablestack, $tagstack );
1529 $tagstack = array();
1531 array_push( $tagstack, $t );
1533 # Strip non-approved attributes from the tag
1534 $newparams = $this->fixTagAttributes($params);
1538 $rest = str_replace( ">", ">", $rest );
1539 $text .= "<$slash$t $newparams$brace$rest";
1543 $text .= "<" . str_replace( ">", ">", $x);
1545 # Close off any remaining tags
1546 while ( $t = array_pop( $tagstack ) ) {
1548 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1551 # this might be possible using tidy itself
1552 foreach ( $bits as $x ) {
1553 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1555 @list
( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1556 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1557 $newparams = $this->fixTagAttributes($params);
1558 $rest = str_replace( ">", ">", $rest );
1559 $text .= "<$slash$t $newparams$brace$rest";
1561 $text .= "<" . str_replace( ">", ">", $x);
1565 wfProfileOut( $fname );
1572 * This function accomplishes several tasks:
1573 * 1) Auto-number headings if that option is enabled
1574 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1575 * 3) Add a Table of contents on the top for users who have enabled the option
1576 * 4) Auto-anchor headings
1578 * It loops through all headlines, collects the necessary data, then splits up the
1579 * string and re-inserts the newly formatted headlines.
1583 /* private */ function formatHeadings( $text, $isMain=true )
1585 global $wgInputEncoding;
1587 $doNumberHeadings = $this->mOptions
->getNumberHeadings();
1588 $doShowToc = $this->mOptions
->getShowToc();
1589 if( !$this->mTitle
->userCanEdit() ) {
1591 $rightClickHack = 0;
1593 $showEditLink = $this->mOptions
->getEditSection();
1594 $rightClickHack = $this->mOptions
->getEditSectionOnRightClick();
1597 # Inhibit editsection links if requested in the page
1598 $esw =& MagicWord
::get( MAG_NOEDITSECTION
);
1599 if( $esw->matchAndRemove( $text ) ) {
1602 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1604 $mw =& MagicWord
::get( MAG_NOTOC
);
1605 if( $mw->matchAndRemove( $text ) ) {
1609 # never add the TOC to the Main Page. This is an entry page that should not
1610 # be more than 1-2 screens large anyway
1611 if( $this->mTitle
->getPrefixedText() == wfMsg("mainpage") ) {
1615 # Get all headlines for numbering them and adding funky stuff like [edit]
1616 # links - this is for later, but we need the number of headlines right now
1617 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1619 # if there are fewer than 4 headlines in the article, do not show TOC
1620 if( $numMatches < 4 ) {
1624 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1625 # override above conditions and always show TOC
1626 $mw =& MagicWord
::get( MAG_FORCETOC
);
1627 if ($mw->matchAndRemove( $text ) ) {
1632 # We need this to perform operations on the HTML
1633 $sk =& $this->mOptions
->getSkin();
1638 # Ugh .. the TOC should have neat indentation levels which can be
1639 # passed to the skin functions. These are determined here
1644 $sublevelCount = array();
1647 foreach( $matches[3] as $headline ) {
1650 $prevlevel = $level;
1652 $level = $matches[1][$headlineCount];
1653 if( ( $doNumberHeadings ||
$doShowToc ) && $prevlevel && $level > $prevlevel ) {
1654 # reset when we enter a new level
1655 $sublevelCount[$level] = 0;
1656 $toc .= $sk->tocIndent( $level - $prevlevel );
1657 $toclevel +
= $level - $prevlevel;
1659 if( ( $doNumberHeadings ||
$doShowToc ) && $level < $prevlevel ) {
1660 # reset when we step back a level
1661 $sublevelCount[$level+
1]=0;
1662 $toc .= $sk->tocUnindent( $prevlevel - $level );
1663 $toclevel -= $prevlevel - $level;
1665 # count number of headlines for each level
1666 @$sublevelCount[$level]++
;
1667 if( $doNumberHeadings ||
$doShowToc ) {
1669 for( $i = 1; $i <= $level; $i++
) {
1670 if( !empty( $sublevelCount[$i] ) ) {
1674 $numbering .= $sublevelCount[$i];
1680 # The canonized header is a version of the header text safe to use for links
1681 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1682 $canonized_headline = $this->unstrip( $headline, $this->mStripState
);
1685 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1686 $tocline = trim( $canonized_headline );
1687 $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT
, $wgInputEncoding ) ) );
1688 # strip out urlencoded (inserted for french spaces, e.g. first space in 'something : something')
1689 $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1690 $refer[$headlineCount] = $canonized_headline;
1692 # count how many in assoc. array so we can track dupes in anchors
1693 @$refers[$canonized_headline]++
;
1694 $refcount[$headlineCount]=$refers[$canonized_headline];
1696 # Prepend the number to the heading text
1698 if( $doNumberHeadings ||
$doShowToc ) {
1699 $tocline = $numbering . " " . $tocline;
1701 # Don't number the heading if it is the only one (looks silly)
1702 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1703 # the two are different if the line contains a link
1704 $headline=$numbering . " " . $headline;
1708 # Create the anchor for linking from the TOC to the section
1709 $anchor = $canonized_headline;
1710 if($refcount[$headlineCount] > 1 ) {
1711 $anchor .= "_" . $refcount[$headlineCount];
1714 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1716 if( $showEditLink ) {
1717 if ( empty( $head[$headlineCount] ) ) {
1718 $head[$headlineCount] = "";
1720 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+
1);
1723 # Add the edit section span
1724 if( $rightClickHack ) {
1725 $headline = $sk->editSectionScript($headlineCount+
1,$headline);
1728 # give headline the correct <h#> tag
1729 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1735 $toclines = $headlineCount;
1736 $toc .= $sk->tocUnindent( $toclevel );
1737 $toc = $sk->tocTable( $toc );
1740 # split up and insert constructed headlines
1742 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1745 foreach( $blocks as $block ) {
1746 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1747 # This is the [edit] link that appears for the top block of text when
1748 # section editing is enabled
1750 # Disabled because it broke block formatting
1751 # For example, a bullet point in the top line
1752 # $full .= $sk->editSectionLink(0);
1755 if( $doShowToc && !$i && $isMain) {
1756 # Top anchor now in skin
1760 if( !empty( $head[$i] ) ) {
1769 /* private */ function magicISBN( $text )
1773 $a = split( "ISBN ", " $text" );
1774 if ( count ( $a ) < 2 ) return $text;
1775 $text = substr( array_shift( $a ), 1);
1776 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1778 foreach ( $a as $x ) {
1779 $isbn = $blank = "" ;
1780 while ( " " == $x{0} ) {
1782 $x = substr( $x, 1 );
1784 while ( strstr( $valid, $x{0} ) != false ) {
1786 $x = substr( $x, 1 );
1788 $num = str_replace( "-", "", $isbn );
1789 $num = str_replace( " ", "", $num );
1792 $text .= "ISBN $blank$x";
1794 $titleObj = Title
::makeTitle( NS_SPECIAL
, "Booksources" );
1795 $text .= "<a href=\"" .
1796 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1797 "\" class=\"internal\">ISBN $isbn</a>";
1803 /* private */ function magicRFC( $text )
1807 $a = split( "ISBN ", " $text" );
1808 if ( count ( $a ) < 2 ) return $text;
1809 $text = substr( array_shift( $a ), 1);
1810 $valid = "0123456789";
1812 foreach ( $a as $x ) {
1813 $rfc = $blank = "" ;
1814 while ( " " == $x{0} ) {
1816 $x = substr( $x, 1 );
1818 while ( strstr( $valid, $x{0} ) != false ) {
1820 $x = substr( $x, 1 );
1824 $text .= "RFC $blank$x";
1826 $url = wfmsg( "rfcurl" );
1827 $url = str_replace( "$1", $rfc, $url);
1828 $sk =& $this->mOptions
->getSkin();
1829 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1830 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1836 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1838 $this->mOptions
= $options;
1839 $this->mTitle
=& $title;
1840 $this->mOutputType
= OT_WIKI
;
1842 if ( $clearState ) {
1843 $this->clearState();
1846 $stripState = false;
1850 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1853 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1854 "/<br *?>/i" => "<br/>",
1856 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1857 $text = $this->strip( $text, $stripState, false );
1858 $text = $this->pstPass2( $text, $user );
1859 $text = $this->unstrip( $text, $stripState );
1863 /* private */ function pstPass2( $text, &$user )
1865 global $wgLang, $wgLocaltimezone, $wgCurParser;
1867 # Variable replacement
1868 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1869 $text = $this->replaceVariables( $text );
1873 $n = $user->getName();
1874 $k = $user->getOption( "nickname" );
1875 if ( "" == $k ) { $k = $n; }
1876 if(isset($wgLocaltimezone)) {
1877 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1879 /* Note: this is an ugly timezone hack for the European wikis */
1880 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1881 " (" . date( "T" ) . ")";
1882 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1884 $text = preg_replace( "/~~~~~/", $d, $text );
1885 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1886 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1887 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1888 Namespace::getUser() ) . ":$n|$k]]", $text );
1890 # Context links: [[|name]] and [[name (context)|]]
1892 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1893 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1894 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1895 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1897 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1898 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1899 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1900 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1901 # [[ns:page (cont)|]]
1903 $t = $this->mTitle
->getText();
1904 if ( preg_match( $conpat, $t, $m ) ) {
1907 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1908 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1909 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1911 if ( "" == $context ) {
1912 $text = preg_replace( $p2, "[[\\1]]", $text );
1914 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1918 $mw =& MagicWord::get( MAG_SUBST );
1919 $wgCurParser = $this->fork();
1920 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1921 $this->merge( $wgCurParser );
1924 # Trim trailing whitespace
1925 # MAG_END (__END__) tag allows for trailing
1926 # whitespace to be deliberately included
1927 $text = rtrim( $text );
1928 $mw =& MagicWord
::get( MAG_END
);
1929 $mw->matchAndRemove( $text );
1934 # Set up some variables which are usually set up in parse()
1935 # so that an external function can call some class members with confidence
1936 function startExternalParse( &$title, $options, $outputType, $clearState = true )
1938 $this->mTitle
=& $title;
1939 $this->mOptions
= $options;
1940 $this->mOutputType
= $outputType;
1941 if ( $clearState ) {
1942 $this->clearState();
1946 function transformMsg( $text, $options ) {
1948 static $executing = false;
1950 # Guard against infinite recursion
1956 $this->mTitle
= $wgTitle;
1957 $this->mOptions
= $options;
1958 $this->mOutputType
= OT_MSG
;
1959 $this->clearState();
1960 $text = $this->replaceVariables( $text );
1969 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1971 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1972 $containsOldMagic = false )
1974 $this->mText
= $text;
1975 $this->mLanguageLinks
= $languageLinks;
1976 $this->mCategoryLinks
= $categoryLinks;
1977 $this->mContainsOldMagic
= $containsOldMagic;
1980 function getText() { return $this->mText
; }
1981 function getLanguageLinks() { return $this->mLanguageLinks
; }
1982 function getCategoryLinks() { return $this->mCategoryLinks
; }
1983 function containsOldMagic() { return $this->mContainsOldMagic
; }
1984 function setText( $text ) { return wfSetVar( $this->mText
, $text ); }
1985 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks
, $ll ); }
1986 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks
, $cl ); }
1987 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic
, $com ); }
1989 function merge( $other ) {
1990 $this->mLanguageLinks
= array_merge( $this->mLanguageLinks
, $other->mLanguageLinks
);
1991 $this->mCategoryLinks
= array_merge( $this->mCategoryLinks
, $this->mLanguageLinks
);
1992 $this->mContainsOldMagic
= $this->mContainsOldMagic ||
$other->mContainsOldMagic
;
1999 # All variables are private
2000 var $mUseTeX; # Use texvc to expand <math> tags
2001 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2002 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2003 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2004 var $mAllowExternalImages; # Allow external images inline
2005 var $mSkin; # Reference to the preferred skin
2006 var $mDateFormat; # Date format index
2007 var $mEditSection; # Create "edit section" links
2008 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2009 var $mNumberHeadings; # Automatically number headings
2010 var $mShowToc; # Show table of contents
2012 function getUseTeX() { return $this->mUseTeX
; }
2013 function getUseCategoryMagic() { return $this->mUseCategoryMagic
; }
2014 function getUseDynamicDates() { return $this->mUseDynamicDates
; }
2015 function getInterwikiMagic() { return $this->mInterwikiMagic
; }
2016 function getAllowExternalImages() { return $this->mAllowExternalImages
; }
2017 function getSkin() { return $this->mSkin
; }
2018 function getDateFormat() { return $this->mDateFormat
; }
2019 function getEditSection() { return $this->mEditSection
; }
2020 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick
; }
2021 function getNumberHeadings() { return $this->mNumberHeadings
; }
2022 function getShowToc() { return $this->mShowToc
; }
2024 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX
, $x ); }
2025 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic
, $x ); }
2026 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates
, $x ); }
2027 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic
, $x ); }
2028 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages
, $x ); }
2029 function setSkin( $x ) { return wfSetRef( $this->mSkin
, $x ); }
2030 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat
, $x ); }
2031 function setEditSection( $x ) { return wfSetVar( $this->mEditSection
, $x ); }
2032 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick
, $x ); }
2033 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings
, $x ); }
2034 function setShowToc( $x ) { return wfSetVar( $this->mShowToc
, $x ); }
2036 /* static */ function newFromUser( &$user )
2038 $popts = new ParserOptions
;
2039 $popts->initialiseFromUser( $user );
2043 function initialiseFromUser( &$userInput )
2045 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2047 if ( !$userInput ) {
2049 $user->setLoaded( true );
2051 $user =& $userInput;
2054 $this->mUseTeX
= $wgUseTeX;
2055 $this->mUseCategoryMagic
= $wgUseCategoryMagic;
2056 $this->mUseDynamicDates
= $wgUseDynamicDates;
2057 $this->mInterwikiMagic
= $wgInterwikiMagic;
2058 $this->mAllowExternalImages
= $wgAllowExternalImages;
2059 $this->mSkin
=& $user->getSkin();
2060 $this->mDateFormat
= $user->getOption( "date" );
2061 $this->mEditSection
= $user->getOption( "editsection" );
2062 $this->mEditSectionOnRightClick
= $user->getOption( "editsectiononrightclick" );
2063 $this->mNumberHeadings
= $user->getOption( "numberheadings" );
2064 $this->mShowToc
= $user->getOption( "showtoc" );
2070 # Regex callbacks, used in Parser::replaceVariables
2071 function wfBraceSubstitution( $matches )
2073 global $wgCurParser;
2074 return $wgCurParser->braceSubstitution( $matches );
2077 function wfArgSubstitution( $matches )
2079 global $wgCurParser;
2080 return $wgCurParser->argSubstitution( $matches );