3 // require_once('Tokenizer.php');
7 # Processes wiki markup
9 # There are two main entry points into the Parser class:
11 # produces HTML output
13 # produces altered wiki markup.
16 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
18 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
21 # $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
22 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
25 # * only within ParserOptions
27 #----------------------------------------
28 # Variable substitution O(N^2) attack
29 #-----------------------------------------
30 # Without countermeasures, it would be possible to attack the parser by saving
31 # a page filled with a large number of inclusions of large pages. The size of
32 # the generated page would be proportional to the square of the input size.
33 # Hence, we limit the number of inclusions of any given page, thus bringing any
34 # attack back to O(N).
35 define( "MAX_INCLUDE_REPEAT", 100 );
36 define( "MAX_INCLUDE_SIZE", 1000000 ); // 1 Million
38 # Allowed values for $mOutputType
39 define( "OT_HTML", 1 );
40 define( "OT_WIKI", 2 );
41 define( "OT_MSG" , 3 );
43 # string parameter for extractTags which will cause it
44 # to strip HTML comments in addition to regular
45 # <XML>-style tags. This should not be anything we
46 # may want to use in wikisyntax
47 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
49 # prefix for escaping, used in two functions at least
50 define( 'UNIQ_PREFIX', 'NaodW29');
52 # Constants needed for external link processing
53 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
54 define( 'HTTP_PROTOCOLS', 'http|https' );
55 # Everything except bracket, space, or control characters
56 define( 'EXT_LINK_URL_CLASS', '[^]\\x00-\\x20\\x7F]' );
57 define( 'INVERSE_EXT_LINK_URL_CLASS', '[\]\\x00-\\x20\\x7F]' );
59 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
60 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
61 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
62 define( 'EXT_LINK_BRACKETED', '/\[(('.URL_PROTOCOLS
.'):'.EXT_LINK_URL_CLASS
.'+) *('.EXT_LINK_TEXT_CLASS
.'*?)\]/S' );
63 define( 'EXT_IMAGE_REGEX',
64 '/^('.HTTP_PROTOCOLS
.':)'. # Protocol
65 '('.EXT_LINK_URL_CLASS
.'+)\\/'. # Hostname and path
66 '('.EXT_IMAGE_FNAME_CLASS
.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS
.')$/S' # Filename
74 # Cleared with clearState():
75 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
76 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
79 var $mOptions, $mTitle, $mOutputType,
80 $mTemplates, // cache of already loaded templates, avoids
81 // multiple SQL queries for the same string
82 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
83 // in this path. Used for loop detection.
86 $this->mTemplates
= array();
87 $this->mTemplatePath
= array();
88 $this->mTagHooks
= array();
92 function clearState() {
93 $this->mOutput
= new ParserOutput
;
94 $this->mAutonumber
= 0;
95 $this->mLastSection
= "";
96 $this->mDTopen
= false;
97 $this->mVariables
= false;
98 $this->mIncludeCount
= array();
99 $this->mStripState
= array();
100 $this->mArgStack
= array();
101 $this->mInPre
= false;
104 # First pass--just handle <nowiki> sections, pass the rest off
105 # to internalParse() which does all the real work.
107 # Returns a ParserOutput
109 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
111 $fname = "Parser::parse";
112 wfProfileIn( $fname );
118 $this->mOptions
= $options;
119 $this->mTitle
=& $title;
120 $this->mOutputType
= OT_HTML
;
123 $text = $this->strip( $text, $this->mStripState
);
124 $text = $this->internalParse( $text, $linestart );
125 $text = $this->unstrip( $text, $this->mStripState
);
126 # Clean up special characters, only run once, next-to-last before doBlockLevels
129 # french spaces, last one Guillemet-left
130 # only if there is something before the space
131 '/ (?=\\?|:|;|!|\\302\\273)/' => ' \\1',
132 '/(\d) (?=\d{3}\D)/' => '\\1 \\2',
133 # french spaces, Guillemet-right
134 "/(\\302\\253) /"=>"\\1 ",
135 '/<hr *>/i' => '<hr />',
136 '/<br *>/i' => '<br />',
137 '/<center *>/i' => '<div class="center">',
138 '/<\\/center *>/i' => '</div>',
139 # Clean up spare ampersands; note that we probably ought to be
140 # more careful about named entities.
141 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&'
143 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
146 # french spaces, last one Guillemet-left
147 '/ (?=\\?|:|;|!|\\302\\273)/' => ' \\1',
148 '/(\d) (?=\d{3}\D)/' => '\\1 \\2',
149 # french spaces, Guillemet-right
150 '/(\\302\\253) /' => '\\1 ',
151 '/([^> ]+(0(1|3|9);)[^< ]*)/i' => '<span class="diacrit">\\1</span>',
152 '/<center *>/i' => '<div class="center">',
153 '/<\\/center *>/i' => '</div>'
155 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
158 $text = $this->doBlockLevels( $text, $linestart );
159 $text = $this->unstripNoWiki( $text, $this->mStripState
);
161 $text = $this->tidy($text);
163 $this->mOutput
->setText( $text );
164 wfProfileOut( $fname );
165 return $this->mOutput
;
168 /* static */ function getRandomString() {
169 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
172 # Replaces all occurrences of <$tag>content</$tag> in the text
173 # with a random marker and returns the new text. the output parameter
174 # $content will be an associative array filled with data on the form
175 # $unique_marker => content.
177 # If $content is already set, the additional entries will be appended
179 # If $tag is set to STRIP_COMMENTS, the function will extract
180 # <!-- HTML comments -->
182 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
183 $rnd = $uniq_prefix . '-' . $tag . Parser
::getRandomString();
190 while ( '' != $text ) {
191 if($tag==STRIP_COMMENTS
) {
192 $p = preg_split( '/<!--/i', $text, 2 );
194 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
197 if ( ( count( $p ) < 2 ) ||
( '' == $p[1] ) ) {
200 if($tag==STRIP_COMMENTS
) {
201 $q = preg_split( '/-->/i', $p[1], 2 );
203 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
205 $marker = $rnd . sprintf('%08X', $n++
);
206 $content[$marker] = $q[0];
207 $stripped .= $marker;
214 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
215 # If $render is set, performs necessary rendering operations on plugins
216 # Returns the text, and fills an array with data needed in unstrip()
217 # If the $state is already a valid strip state, it adds to the state
219 # When $stripcomments is set, HTML comments <!-- like this -->
220 # will be stripped in addition to other tags. This is important
221 # for section editing, where these comments cause confusion when
222 # counting the sections in the wikisource
223 function strip( $text, &$state, $stripcomments = false ) {
224 $render = ($this->mOutputType
== OT_HTML
);
225 $html_content = array();
226 $nowiki_content = array();
227 $math_content = array();
228 $pre_content = array();
229 $comment_content = array();
230 $ext_content = array();
232 # Replace any instances of the placeholders
233 $uniq_prefix = UNIQ_PREFIX
;
234 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
239 $text = Parser
::extractTags('html', $text, $html_content, $uniq_prefix);
240 foreach( $html_content as $marker => $content ) {
242 # Raw and unchecked for validity.
243 $html_content[$marker] = $content;
245 $html_content[$marker] = "<html>$content</html>";
251 $text = Parser
::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
252 foreach( $nowiki_content as $marker => $content ) {
254 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
256 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
261 $text = Parser
::extractTags('math', $text, $math_content, $uniq_prefix);
262 foreach( $math_content as $marker => $content ){
264 if( $this->mOptions
->getUseTeX() ) {
265 $math_content[$marker] = renderMath( $content );
267 $math_content[$marker] = "<math>$content<math>";
270 $math_content[$marker] = "<math>$content</math>";
275 $text = Parser
::extractTags('pre', $text, $pre_content, $uniq_prefix);
276 foreach( $pre_content as $marker => $content ){
278 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
280 $pre_content[$marker] = "<pre>$content</pre>";
286 $text = Parser
::extractTags(STRIP_COMMENTS
, $text, $comment_content, $uniq_prefix);
287 foreach( $comment_content as $marker => $content ){
288 $comment_content[$marker] = "<!--$content-->";
293 foreach ( $this->mTagHooks
as $tag => $callback ) {
294 $ext_contents[$tag] = array();
295 $text = Parser
::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
296 foreach( $ext_content[$tag] as $marker => $content ) {
298 $ext_content[$tag][$marker] = $callback( $content );
300 $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
305 # Merge state with the pre-existing state, if there is one
307 $state['html'] = $state['html'] +
$html_content;
308 $state['nowiki'] = $state['nowiki'] +
$nowiki_content;
309 $state['math'] = $state['math'] +
$math_content;
310 $state['pre'] = $state['pre'] +
$pre_content;
311 $state['comment'] = $state['comment'] +
$comment_content;
313 foreach( $ext_content as $tag => $array ) {
314 if ( array_key_exists( $tag, $state ) ) {
315 $state[$tag] = $state[$tag] +
$array;
320 'html' => $html_content,
321 'nowiki' => $nowiki_content,
322 'math' => $math_content,
323 'pre' => $pre_content,
324 'comment' => $comment_content,
330 # always call unstripNoWiki() after this one
331 function unstrip( $text, &$state ) {
332 # Must expand in reverse order, otherwise nested tags will be corrupted
333 $contentDict = end( $state );
334 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
335 if( key($state) != 'nowiki' && key($state) != 'html') {
336 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
337 $text = str_replace( key( $contentDict ), $content, $text );
344 # always call this after unstrip() to preserve the order
345 function unstripNoWiki( $text, &$state ) {
346 # Must expand in reverse order, otherwise nested tags will be corrupted
347 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
348 $text = str_replace( key( $state['nowiki'] ), $content, $text );
353 for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
354 $text = str_replace( key( $state['html'] ), $content, $text );
361 # Add an item to the strip state
362 # Returns the unique tag which must be inserted into the stripped text
363 # The tag will be replaced with the original text in unstrip()
364 function insertStripItem( $text, &$state ) {
365 $rnd = UNIQ_PREFIX
. '-item' . Parser
::getRandomString();
374 $state['item'][$rnd] = $text;
379 # generate a list of subcategories and pages for a category
380 # depending on wfMsg("usenewcategorypage") it either calls the new
381 # or the old code. The new code will not work properly for some
382 # languages due to sorting issues, so they might want to turn it
384 function categoryMagic() {
385 $msg = wfMsg('usenewcategorypage');
386 if ( '0' == @$msg[0] )
388 return $this->oldCategoryMagic();
390 return $this->newCategoryMagic();
394 # This method generates the list of subcategories and pages for a category
395 function oldCategoryMagic () {
397 $fname = 'Parser::oldCategoryMagic';
399 if ( !$this->mOptions
->getUseCategoryMagic() ) return ; # Doesn't use categories at all
401 if ( $this->mTitle
->getNamespace() != NS_CATEGORY
) return "" ; # This ain't a category page
403 $r = "<br style=\"clear:both;\"/>\n";
406 $sk =& $this->mOptions
->getSkin() ;
408 $articles = array() ;
409 $children = array() ;
411 $id = $this->mTitle
->getArticleID() ;
414 $dbr =& wfGetDB( DB_SLAVE
);
415 $cur = $dbr->tableName( 'cur' );
416 $categorylinks = $dbr->tableName( 'categorylinks' );
418 $t = $dbr->strencode( $this->mTitle
->getDBKey() );
419 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM $cur,$categorylinks " .
420 "WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
421 $res = $dbr->query( $sql, $fname ) ;
422 while ( $x = $dbr->fetchObject ( $res ) ) $data[] = $x ;
424 # For all pages that link to this category
425 foreach ( $data AS $x )
427 $t = $wgLang->getNsText ( $x->cur_namespace
) ;
428 if ( $t != '' ) $t .= ':' ;
429 $t .= $x->cur_title
;
431 if ( $x->cur_namespace
== NS_CATEGORY
) {
432 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
434 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
437 $dbr->freeResult ( $res ) ;
439 # Showing subcategories
440 if ( count ( $children ) > 0 ) {
441 $r .= '<h2>'.wfMsg('subcategories')."</h2>\n" ;
442 $r .= implode ( ', ' , $children ) ;
445 # Showing pages in this category
446 if ( count ( $articles ) > 0 ) {
447 $ti = $this->mTitle
->getText() ;
448 $h = wfMsg( 'category_header', $ti );
449 $r .= "<h2>$h</h2>\n" ;
450 $r .= implode ( ', ' , $articles ) ;
456 function newCategoryMagic () {
458 if ( !$this->mOptions
->getUseCategoryMagic() ) return ; # Doesn't use categories at all
460 if ( $this->mTitle
->getNamespace() != NS_CATEGORY
) return '' ; # This ain't a category page
462 $r = "<br style=\"clear:both;\"/>\n";
465 $sk =& $this->mOptions
->getSkin() ;
467 $articles = array() ;
468 $articles_start_char = array();
469 $children = array() ;
470 $children_start_char = array();
472 $id = $this->mTitle
->getArticleID() ;
475 $dbr =& wfGetDB( DB_SLAVE
);
476 $cur = $dbr->tableName( 'cur' );
477 $categorylinks = $dbr->tableName( 'categorylinks' );
479 $t = $dbr->strencode( $this->mTitle
->getDBKey() );
480 $sql = "SELECT DISTINCT cur_title,cur_namespace,cl_sortkey FROM " .
481 "$cur,$categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
482 $res = $dbr->query ( $sql ) ;
483 while ( $x = $dbr->fetchObject ( $res ) )
485 $t = $ns = $wgLang->getNsText ( $x->cur_namespace
) ;
486 if ( $t != '' ) $t .= ':' ;
487 $t .= $x->cur_title
;
489 if ( $x->cur_namespace
== NS_CATEGORY
) {
490 $ctitle = str_replace( '_',' ',$x->cur_title
);
491 array_push ( $children, $sk->makeKnownLink ( $t, $ctitle ) ) ; # Subcategory
493 // If there's a link from Category:A to Category:B, the sortkey of the resulting
494 // entry in the categorylinks table is Category:A, not A, which it SHOULD be.
495 // Workaround: If sortkey == "Category:".$title, than use $title for sorting,
496 // else use sortkey...
497 if ( ($ns.':'.$ctitle) == $x->cl_sortkey
) {
498 array_push ( $children_start_char, $wgLang->firstChar( $x->cur_title
) );
500 array_push ( $children_start_char, $wgLang->firstChar( $x->cl_sortkey
) ) ;
503 array_push ( $articles , $sk->makeKnownLink ( $t ) ) ; # Page in this category
504 array_push ( $articles_start_char, $wgLang->firstChar( $x->cl_sortkey
) ) ;
507 $dbr->freeResult ( $res ) ;
509 $ti = $this->mTitle
->getText() ;
511 # Don't show subcategories section if there are none.
512 if ( count ( $children ) > 0 )
514 # Showing subcategories
515 $r .= '<h2>' . wfMsg( 'subcategories' ) . "</h2>\n";
517 $numchild = count( $children );
519 $r .= wfMsg( 'subcategorycount1', 1 );
521 $r .= wfMsg( 'subcategorycount' , $numchild );
525 if ( count ( $children ) > 6 ) {
527 // divide list into three equal chunks
528 $chunk = (int) (count ( $children ) / 3);
530 // get and display header
531 $r .= '<table width="100%"><tr valign="top">';
536 // loop through the chunks
537 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
539 $chunkIndex++
, $startChunk = $endChunk, $endChunk +
= $chunk +
1)
543 // output all subcategories to category
544 for ($index = $startChunk ;
545 $index < $endChunk && $index < count($children);
548 // check for change of starting letter or begging of chunk
549 if ( ($children_start_char[$index] != $children_start_char[$index - 1])
550 ||
($index == $startChunk) )
552 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
555 $r .= "<li>{$children[$index]}</li>";
561 $r .= '</tr></table>';
563 // for short lists of subcategories to category.
565 $r .= "<h3>{$children_start_char[0]}</h3>\n";
566 $r .= '<ul><li>'.$children[0].'</li>';
567 for ($index = 1; $index < count($children); $index++
)
569 if ($children_start_char[$index] != $children_start_char[$index - 1])
571 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
574 $r .= "<li>{$children[$index]}</li>";
578 } # END of if ( count($children) > 0 )
580 $r .= '<h2>' . wfMsg( 'category_header', $ti ) . "</h2>\n";
582 $numart = count( $articles );
584 $r .= wfMsg( 'categoryarticlecount1', 1 );
586 $r .= wfMsg( 'categoryarticlecount' , $numart );
590 # Showing articles in this category
591 if ( count ( $articles ) > 6) {
592 $ti = $this->mTitle
->getText() ;
594 // divide list into three equal chunks
595 $chunk = (int) (count ( $articles ) / 3);
597 // get and display header
598 $r .= '<table width="100%"><tr valign="top">';
600 // loop through the chunks
601 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
603 $chunkIndex++
, $startChunk = $endChunk, $endChunk +
= $chunk +
1)
608 // output all articles in category
609 for ($index = $startChunk ;
610 $index < $endChunk && $index < count($articles);
613 // check for change of starting letter or begging of chunk
614 if ( ($articles_start_char[$index] != $articles_start_char[$index - 1])
615 ||
($index == $startChunk) )
617 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
620 $r .= "<li>{$articles[$index]}</li>";
626 $r .= '</tr></table>';
627 } elseif ( count($articles) > 0) {
628 // for short lists of articles in categories.
629 $ti = $this->mTitle
->getText() ;
631 $r .= '<h3>'.$articles_start_char[0]."</h3>\n";
632 $r .= '<ul><li>'.$articles[0].'</li>';
633 for ($index = 1; $index < count($articles); $index++
)
635 if ($articles_start_char[$index] != $articles_start_char[$index - 1])
637 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
640 $r .= "<li>{$articles[$index]}</li>";
649 # Return allowed HTML attributes
650 function getHTMLattrs () {
651 $htmlattrs = array( # Allowed attributes--no scripting, etc.
652 'title', 'align', 'lang', 'dir', 'width', 'height',
653 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
654 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
655 /* FONT */ 'type', 'start', 'value', 'compact',
656 /* For various lists, mostly deprecated but safe */
657 'summary', 'width', 'border', 'frame', 'rules',
658 'cellspacing', 'cellpadding', 'valign', 'char',
659 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
660 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
661 'id', 'class', 'name', 'style' /* For CSS */
666 # Remove non approved attributes and javascript in css
667 function fixTagAttributes ( $t ) {
668 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
669 $htmlattrs = $this->getHTMLattrs() ;
671 # Strip non-approved attributes from the tag
673 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
674 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
677 $t = str_replace ( "<></>" , "" , $t ) ; # This should fix bug 980557
679 # Strip javascript "expression" from stylesheets. Brute force approach:
680 # If anythin offensive is found, all attributes of the HTML tag are dropped
683 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
684 wfMungeToUtf8( $t ) ) )
692 # interface with html tidy, used if $wgUseTidy = true
693 function tidy ( $text ) {
694 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
695 global $wgInputEncoding, $wgOutputEncoding;
696 $fname = 'Parser::tidy';
697 wfProfileIn( $fname );
700 switch(strtoupper($wgOutputEncoding)) {
702 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)?
' -latin1':' -raw';
705 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)?
' -utf8':' -raw';
708 $wgTidyOpts .= ' -raw';
711 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
712 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
713 '<head><title>test</title></head><body>'.$text.'</body></html>';
714 $descriptorspec = array(
715 0 => array('pipe', 'r'),
716 1 => array('pipe', 'w'),
717 2 => array('file', '/dev/null', 'a')
719 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
720 if (is_resource($process)) {
721 fwrite($pipes[0], $wrappedtext);
723 while (!feof($pipes[1])) {
724 $cleansource .= fgets($pipes[1], 1024);
727 $return_value = proc_close($process);
730 wfProfileOut( $fname );
732 if( $cleansource == '' && $text != '') {
733 wfDebug( "Tidy error detected!\n" );
734 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
740 # parse the wiki syntax used to render tables
741 function doTableStuff ( $t ) {
742 $fname = 'Parser::doTableStuff';
743 wfProfileIn( $fname );
745 $t = explode ( "\n" , $t ) ;
746 $td = array () ; # Is currently a td tag open?
747 $ltd = array () ; # Was it TD or TH?
748 $tr = array () ; # Is currently a tr tag open?
749 $ltr = array () ; # tr attributes
750 $indent_level = 0; # indent level of the table
751 foreach ( $t AS $k => $x )
754 $fc = substr ( $x , 0 , 1 ) ;
755 if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
756 $indent_level = strlen( $matches[1] );
758 str_repeat( "<dl><dd>", $indent_level ) .
759 "<table " . $this->fixTagAttributes ( $matches[2] ) . '>' ;
760 array_push ( $td , false ) ;
761 array_push ( $ltd , '' ) ;
762 array_push ( $tr , false ) ;
763 array_push ( $ltr , '' ) ;
765 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
766 else if ( '|}' == substr ( $x , 0 , 2 ) ) {
768 $l = array_pop ( $ltd ) ;
769 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
770 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
772 $t[$k] = $z . str_repeat( "</dd></dl>", $indent_level );
774 else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
775 $x = substr ( $x , 1 ) ;
776 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
778 $l = array_pop ( $ltd ) ;
779 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
780 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
783 array_push ( $tr , false ) ;
784 array_push ( $td , false ) ;
785 array_push ( $ltd , '' ) ;
786 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
788 else if ( '|' == $fc ||
'!' == $fc ||
'|+' == substr ( $x , 0 , 2 ) ) { # Caption
789 if ( '|+' == substr ( $x , 0 , 2 ) ) {
791 $x = substr ( $x , 1 ) ;
793 $after = substr ( $x , 1 ) ;
794 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
795 $after = explode ( '||' , $after ) ;
797 foreach ( $after AS $theline )
802 $tra = array_pop ( $ltr ) ;
803 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
804 array_push ( $tr , true ) ;
805 array_push ( $ltr , '' ) ;
808 $l = array_pop ( $ltd ) ;
809 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
810 if ( $fc == '|' ) $l = 'td' ;
811 else if ( $fc == '!' ) $l = 'th' ;
812 else if ( $fc == '+' ) $l = 'caption' ;
814 array_push ( $ltd , $l ) ;
815 $y = explode ( '|' , $theline , 2 ) ;
816 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
817 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
819 array_push ( $td , true ) ;
824 # Closing open td, tr && table
825 while ( count ( $td ) > 0 )
827 if ( array_pop ( $td ) ) $t[] = '</td>' ;
828 if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
832 $t = implode ( "\n" , $t ) ;
833 # $t = $this->removeHTMLtags( $t );
834 wfProfileOut( $fname );
838 # Parses the text and adds the result to the strip state
839 # Returns the strip tag
840 function stripParse( $text, $newline, $args ) {
841 $text = $this->strip( $text, $this->mStripState
);
842 $text = $this->internalParse( $text, (bool)$newline, $args, false );
843 return $newline.$this->insertStripItem( $text, $this->mStripState
);
846 function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
847 $fname = 'Parser::internalParse';
848 wfProfileIn( $fname );
850 $text = $this->removeHTMLtags( $text );
851 $text = $this->replaceVariables( $text, $args );
853 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
855 $text = $this->doHeadings( $text );
856 if($this->mOptions
->getUseDynamicDates()) {
857 global $wgDateFormatter;
858 $text = $wgDateFormatter->reformat( $this->mOptions
->getDateFormat(), $text );
860 $text = $this->doAllQuotes( $text );
861 $text = $this->replaceExternalLinks( $text );
862 $text = $this->doMagicLinks( $text );
863 $text = $this->replaceInternalLinks ( $text );
864 $text = $this->replaceInternalLinks ( $text );
866 $text = $this->unstrip( $text, $this->mStripState
);
867 $text = $this->unstripNoWiki( $text, $this->mStripState
);
869 $text = $this->doTableStuff( $text );
870 $text = $this->formatHeadings( $text, $isMain );
871 $sk =& $this->mOptions
->getSkin();
872 $text = $sk->transformContent( $text );
874 if ( $isMain && !isset ( $this->categoryMagicDone
) ) {
875 $text .= $this->categoryMagic () ;
876 $this->categoryMagicDone
= true ;
879 wfProfileOut( $fname );
883 /* private */ function &doMagicLinks( &$text ) {
884 global $wgUseGeoMode;
885 $text = $this->magicISBN( $text );
886 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
887 $text = $this->magicGEO( $text );
889 $text = $this->magicRFC( $text );
893 # Parse ^^ tokens and return html
894 /* private */ function doExponent ( $text ) {
895 $fname = 'Parser::doExponent';
896 wfProfileIn( $fname);
897 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
898 wfProfileOut( $fname);
902 # Parse headers and return html
903 /* private */ function doHeadings( $text ) {
904 $fname = 'Parser::doHeadings';
905 wfProfileIn( $fname );
906 for ( $i = 6; $i >= 1; --$i ) {
907 $h = substr( '======', 0, $i );
908 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
909 "<h{$i}>\\1</h{$i}>\\2", $text );
911 wfProfileOut( $fname );
915 /* private */ function doAllQuotes( $text ) {
916 $fname = 'Parser::doAllQuotes';
917 wfProfileIn( $fname );
919 $lines = explode( "\n", $text );
920 foreach ( $lines as $line ) {
921 $outtext .= $this->doQuotes ( $line ) . "\n";
923 $outtext = substr($outtext, 0,-1);
924 wfProfileOut( $fname );
928 /* private */ function doQuotes( $text ) {
929 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE
);
930 if (count ($arr) == 1)
934 # First, do some preliminary work. This may shift some apostrophes from
935 # being mark-up to being text. It also counts the number of occurrences
936 # of bold and italics mark-ups.
944 # If there are ever four apostrophes, assume the first is supposed to
945 # be text, and the remaining three constitute mark-up for bold text.
946 if (strlen ($arr[$i]) == 4)
951 # If there are more than 5 apostrophes in a row, assume they're all
952 # text except for the last 5.
953 else if (strlen ($arr[$i]) > 5)
955 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
958 # Count the number of occurrences of bold and italics mark-ups.
959 # We are not counting sequences of five apostrophes.
960 if (strlen ($arr[$i]) == 2) $numitalics++
; else
961 if (strlen ($arr[$i]) == 3) $numbold++
; else
962 if (strlen ($arr[$i]) == 5) { $numitalics++
; $numbold++
; }
967 # If there is an odd number of both bold and italics, it is likely
968 # that one of the bold ones was meant to be an apostrophe followed
969 # by italics. Which one we cannot know for certain, but it is more
970 # likely to be one that has a single-letter word before it.
971 if (($numbold %
2 == 1) && ($numitalics %
2 == 1))
974 $firstsingleletterword = -1;
975 $firstmultiletterword = -1;
979 if (($i %
2 == 1) and (strlen ($r) == 3))
981 $x1 = substr ($arr[$i-1], -1);
982 $x2 = substr ($arr[$i-1], -2, 1);
984 if ($firstspace == -1) $firstspace = $i;
985 } else if ($x2 == " ") {
986 if ($firstsingleletterword == -1) $firstsingleletterword = $i;
988 if ($firstmultiletterword == -1) $firstmultiletterword = $i;
994 # If there is a single-letter word, use it!
995 if ($firstsingleletterword > -1)
997 $arr [ $firstsingleletterword ] = "''";
998 $arr [ $firstsingleletterword-1 ] .= "'";
1000 # If not, but there's a multi-letter word, use that one.
1001 else if ($firstmultiletterword > -1)
1003 $arr [ $firstmultiletterword ] = "''";
1004 $arr [ $firstmultiletterword-1 ] .= "'";
1006 # ... otherwise use the first one that has neither.
1007 # (notice that it is possible for all three to be -1 if, for example,
1008 # there is only one pentuple-apostrophe in the line)
1009 else if ($firstspace > -1)
1011 $arr [ $firstspace ] = "''";
1012 $arr [ $firstspace-1 ] .= "'";
1016 # Now let's actually convert our apostrophic mush to HTML!
1021 foreach ($arr as $r)
1025 if ($state == 'both')
1032 if (strlen ($r) == 2)
1035 { $output .= "</em>"; $state = ''; }
1036 else if ($state == 'strongem')
1037 { $output .= "</em>"; $state = 'strong'; }
1038 else if ($state == 'emstrong')
1039 { $output .= "</strong></em><strong>"; $state = 'strong'; }
1040 else if ($state == 'both')
1041 { $output .= "<strong><em>{$buffer}</em>"; $state = 'strong'; }
1042 else # $state can be 'strong' or ''
1043 { $output .= "<em>"; $state .= 'em'; }
1045 else if (strlen ($r) == 3)
1047 if ($state == 'strong')
1048 { $output .= "</strong>"; $state = ''; }
1049 else if ($state == 'strongem')
1050 { $output .= "</em></strong><em>"; $state = 'em'; }
1051 else if ($state == 'emstrong')
1052 { $output .= "</strong>"; $state = 'em'; }
1053 else if ($state == 'both')
1054 { $output .= "<em><strong>{$buffer}</strong>"; $state = 'em'; }
1055 else # $state can be 'em' or ''
1056 { $output .= "<strong>"; $state .= 'strong'; }
1058 else if (strlen ($r) == 5)
1060 if ($state == 'strong')
1061 { $output .= "</strong><em>"; $state = 'em'; }
1062 else if ($state == 'em')
1063 { $output .= "</em><strong>"; $state = 'strong'; }
1064 else if ($state == 'strongem')
1065 { $output .= "</em></strong>"; $state = ''; }
1066 else if ($state == 'emstrong')
1067 { $output .= "</strong></em>"; $state = ''; }
1068 else if ($state == 'both')
1069 { $output .= "<em><strong>{$buffer}</strong></em>"; $state = ''; }
1070 else # ($state == '')
1071 { $buffer = ''; $state = 'both'; }
1076 # Now close all remaining tags. Notice that the order is important.
1077 if ($state == 'strong' ||
$state == 'emstrong')
1078 $output .= '</strong>';
1079 if ($state == 'em' ||
$state == 'strongem' ||
$state == 'emstrong')
1081 if ($state == 'strongem')
1082 $output .= '</strong>';
1083 if ($state == 'both')
1084 $output .= "<strong><em>{$buffer}</em></strong>";
1089 # Note: we have to do external links before the internal ones,
1090 # and otherwise take great care in the order of things here, so
1091 # that we don't end up interpreting some URLs twice.
1093 /* private */ function replaceExternalLinks( $text ) {
1094 $fname = 'Parser::replaceExternalLinks';
1095 wfProfileIn( $fname );
1097 $sk =& $this->mOptions
->getSkin();
1098 $linktrail = wfMsg('linktrail');
1099 $bits = preg_split( EXT_LINK_BRACKETED
, $text, -1, PREG_SPLIT_DELIM_CAPTURE
);
1101 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
1104 while ( $i<count( $bits ) ) {
1106 $protocol = $bits[$i++
];
1107 $text = $bits[$i++
];
1108 $trail = $bits[$i++
];
1110 # If the link text is an image URL, replace it with an <img> tag
1111 # This happened by accident in the original parser, but some people used it extensively
1112 $img = $this->maybeMakeImageLink( $text );
1113 if ( $img !== false ) {
1119 # No link text, e.g. [http://domain.tld/some.link]
1120 if ( $text == '' ) {
1121 # Autonumber if allowed
1122 if ( strpos( HTTP_PROTOCOLS
, $protocol ) !== false ) {
1123 $text = "[" . ++
$this->mAutonumber
. "]";
1125 # Otherwise just use the URL
1126 $text = htmlspecialchars( $url );
1129 # Have link text, e.g. [http://domain.tld/some.link text]s
1131 if ( preg_match( $linktrail, $trail, $m2 ) ) {
1137 $encUrl = htmlspecialchars( $url );
1138 # Bit in parentheses showing the URL for the printable version
1139 if( $url == $text ||
preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $url ) ) {
1142 # Expand the URL for printable version
1143 if ( ! $sk->suppressUrlExpansion() ) {
1144 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
1150 # Process the trail (i.e. everything after this link up until start of the next link),
1151 # replacing any non-bracketed links
1152 $trail = $this->replaceFreeExternalLinks( $trail );
1154 $la = $sk->getExternalLinkAttributes( $url, $text );
1156 # Use the encoded URL
1157 # This means that users can paste URLs directly into the text
1158 # Funny characters like ö aren't valid in URLs anyway
1159 # This was changed in August 2004
1160 $s .= "<a href=\"{$url}\" {$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
1163 wfProfileOut( $fname );
1167 # Replace anything that looks like a URL with a link
1168 function replaceFreeExternalLinks( $text ) {
1169 $bits = preg_split( '/((?:'.URL_PROTOCOLS
.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE
);
1170 $s = array_shift( $bits );
1173 $sk =& $this->mOptions
->getSkin();
1175 while ( $i < count( $bits ) ){
1176 $protocol = $bits[$i++
];
1177 $remainder = $bits[$i++
];
1179 if ( preg_match( '/^('.EXT_LINK_URL_CLASS
.'+)(.*)$/s', $remainder, $m ) ) {
1180 # Found some characters after the protocol that look promising
1181 $url = $protocol . $m[1];
1184 # Move trailing punctuation to $trail
1186 # If there is no left bracket, then consider right brackets fair game too
1187 if ( strpos( $url, '(' ) === false ) {
1191 $numSepChars = strspn( strrev( $url ), $sep );
1192 if ( $numSepChars ) {
1193 $trail = substr( $url, -$numSepChars ) . $trail;
1194 $url = substr( $url, 0, -$numSepChars );
1197 # Replace & from obsolete syntax with &
1198 $url = str_replace( '&', '&', $url );
1200 # Is this an external image?
1201 $text = $this->maybeMakeImageLink( $url );
1202 if ( $text === false ) {
1203 # Not an image, make a link
1204 $text = $sk->makeExternalLink( $url, $url );
1206 $s .= $text . $trail;
1208 $s .= $protocol . $remainder;
1214 # make an image if it's allowed
1215 function maybeMakeImageLink( $url ) {
1216 $sk =& $this->mOptions
->getSkin();
1218 if ( $this->mOptions
->getAllowExternalImages() ) {
1219 if ( preg_match( EXT_IMAGE_REGEX
, $url ) ) {
1221 $text = $sk->makeImage( htmlspecialchars( $url ) );
1227 # The wikilinks [[ ]] are procedeed here.
1228 /* private */ function replaceInternalLinks( $s ) {
1229 global $wgLang, $wgLinkCache;
1230 global $wgNamespacesWithSubpages, $wgLanguageCode;
1231 static $fname = 'Parser::replaceInternalLinks' ;
1232 wfProfileIn( $fname );
1234 wfProfileIn( $fname.'-setup' );
1236 # the % is needed to support urlencoded titles as well
1237 if ( !$tc ) { $tc = Title
::legalChars() . '#%'; }
1238 $sk =& $this->mOptions
->getSkin();
1240 $redirect = MagicWord
::get ( MAG_REDIRECT
) ;
1242 $a = explode( '[[', ' ' . $s );
1243 $s = array_shift( $a );
1244 $s = substr( $s, 1 );
1246 # Match a link having the form [[namespace:link|alternate]]trail
1248 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1249 # Match the end of a line for a word that's not followed by whitespace,
1250 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1251 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1253 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
1254 # Special and Media are pseudo-namespaces; no pages actually exist in them
1256 $nottalk = !Namespace::isTalk( $this->mTitle
->getNamespace() );
1258 if ( $useLinkPrefixExtension ) {
1259 if ( preg_match( $e2, $s, $m ) ) {
1260 $first_prefix = $m[2];
1263 $first_prefix = false;
1269 wfProfileOut( $fname.'-setup' );
1271 # start procedeeding each line
1272 foreach ( $a as $line ) {
1273 wfProfileIn( $fname.'-prefixhandling' );
1274 if ( $useLinkPrefixExtension ) {
1275 if ( preg_match( $e2, $s, $m ) ) {
1283 $prefix = $first_prefix;
1284 $first_prefix = false;
1287 wfProfileOut( $fname.'-prefixhandling' );
1289 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1291 # fix up urlencoded title texts
1292 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1294 } else { # Invalid form; output directly
1295 $s .= $prefix . '[[' . $line ;
1301 # :Foobar -- override special treatment of prefix (images, language links)
1302 # /Foobar -- convert to CurrentPage/Foobar
1303 # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1305 # Look at the first character
1306 $c = substr($m[1],0,1);
1307 $noforce = ($c != ':');
1311 # / at end means we don't want the slash to be shown
1312 if(substr($m[1],-1,1)=='/') {
1313 $m[1]=substr($m[1],1,strlen($m[1])-2);
1316 $noslash=substr($m[1],1);
1319 # Some namespaces don't allow subpages
1320 if(!empty($wgNamespacesWithSubpages[$this->mTitle
->getNamespace()])) {
1321 # subpages allowed here
1322 $link = $this->mTitle
->getPrefixedText(). '/' . trim($noslash);
1325 } # this might be changed for ugliness reasons
1327 # no subpage allowed, use standard link
1331 } elseif( $noforce ) { # no subpage
1334 # We don't want to keep the first character
1335 $link = substr( $m[1], 1 );
1338 $wasblank = ( '' == $text );
1339 if( $wasblank ) $text = $link;
1341 $nt = Title
::newFromText( $link );
1343 $s .= $prefix . '[[' . $line;
1347 $ns = $nt->getNamespace();
1348 $iw = $nt->getInterWiki();
1350 # Link not escaped by : , create the various objects
1354 if( $iw && $this->mOptions
->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1355 array_push( $this->mOutput
->mLanguageLinks
, $nt->getFullText() );
1356 $tmp = $prefix . $trail ;
1357 $s .= (trim($tmp) == '')?
'': $tmp;
1361 if ( $ns == NS_IMAGE
) {
1362 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1363 $wgLinkCache->addImageLinkObj( $nt );
1367 if ( $ns == NS_CATEGORY
) {
1368 $t = $nt->getText() ;
1369 $nnt = Title
::newFromText ( Namespace::getCanonicalName(NS_CATEGORY
).":".$t ) ;
1371 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1372 $pPLC=$sk->postParseLinkColour();
1373 $sk->postParseLinkColour( false );
1374 $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1375 $sk->postParseLinkColour( $pPLC );
1376 $wgLinkCache->resume();
1378 $sortkey = $wasblank ?
$this->mTitle
->getPrefixedText() : $text;
1379 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1380 $this->mOutput
->mCategoryLinks
[] = $t ;
1381 $s .= $prefix . $trail ;
1386 if( ( $nt->getPrefixedText() == $this->mTitle
->getPrefixedText() ) &&
1387 ( strpos( $link, '#' ) == FALSE ) ) {
1388 # Self-links are handled specially; generally de-link and change to bold.
1389 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1393 if( $ns == NS_MEDIA
) {
1394 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1395 $wgLinkCache->addImageLinkObj( $nt );
1397 } elseif( $ns == NS_SPECIAL
) {
1398 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1401 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1403 wfProfileOut( $fname );
1407 # Some functions here used by doBlockLevels()
1409 /* private */ function closeParagraph() {
1411 if ( '' != $this->mLastSection
) {
1412 $result = '</' . $this->mLastSection
. ">\n";
1414 $this->mInPre
= false;
1415 $this->mLastSection
= '';
1418 # getCommon() returns the length of the longest common substring
1419 # of both arguments, starting at the beginning of both.
1421 /* private */ function getCommon( $st1, $st2 ) {
1422 $fl = strlen( $st1 );
1423 $shorter = strlen( $st2 );
1424 if ( $fl < $shorter ) { $shorter = $fl; }
1426 for ( $i = 0; $i < $shorter; ++
$i ) {
1427 if ( $st1{$i} != $st2{$i} ) { break; }
1431 # These next three functions open, continue, and close the list
1432 # element appropriate to the prefix character passed into them.
1434 /* private */ function openList( $char ) {
1435 $result = $this->closeParagraph();
1437 if ( '*' == $char ) { $result .= '<ul><li>'; }
1438 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1439 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1440 else if ( ';' == $char ) {
1441 $result .= '<dl><dt>';
1442 $this->mDTopen
= true;
1444 else { $result = '<!-- ERR 1 -->'; }
1449 /* private */ function nextItem( $char ) {
1450 if ( '*' == $char ||
'#' == $char ) { return '</li><li>'; }
1451 else if ( ':' == $char ||
';' == $char ) {
1453 if ( $this->mDTopen
) { $close = '</dt>'; }
1454 if ( ';' == $char ) {
1455 $this->mDTopen
= true;
1456 return $close . '<dt>';
1458 $this->mDTopen
= false;
1459 return $close . '<dd>';
1462 return '<!-- ERR 2 -->';
1465 /* private */ function closeList( $char ) {
1466 if ( '*' == $char ) { $text = '</li></ul>'; }
1467 else if ( '#' == $char ) { $text = '</li></ol>'; }
1468 else if ( ':' == $char ) {
1469 if ( $this->mDTopen
) {
1470 $this->mDTopen
= false;
1471 $text = '</dt></dl>';
1473 $text = '</dd></dl>';
1476 else { return '<!-- ERR 3 -->'; }
1480 /* private */ function doBlockLevels( $text, $linestart ) {
1481 $fname = 'Parser::doBlockLevels';
1482 wfProfileIn( $fname );
1484 # Parsing through the text line by line. The main thing
1485 # happening here is handling of block-level elements p, pre,
1486 # and making lists from lines starting with * # : etc.
1488 $textLines = explode( "\n", $text );
1490 $lastPrefix = $output = $lastLine = '';
1491 $this->mDTopen
= $inBlockElem = false;
1493 $paragraphStack = false;
1495 if ( !$linestart ) {
1496 $output .= array_shift( $textLines );
1498 foreach ( $textLines as $oLine ) {
1499 $lastPrefixLength = strlen( $lastPrefix );
1500 $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1501 $preOpenMatch = preg_match('/<pre/i', $oLine );
1502 if ( !$this->mInPre
) {
1503 # Multiple prefixes may abut each other for nested lists.
1504 $prefixLength = strspn( $oLine, '*#:;' );
1505 $pref = substr( $oLine, 0, $prefixLength );
1508 $pref2 = str_replace( ';', ':', $pref );
1509 $t = substr( $oLine, $prefixLength );
1510 $this->mInPre
= !empty($preOpenMatch);
1512 # Don't interpret any other prefixes in preformatted text
1514 $pref = $pref2 = '';
1519 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1520 # Same as the last item, so no need to deal with nesting or opening stuff
1521 $output .= $this->nextItem( substr( $pref, -1 ) );
1522 $paragraphStack = false;
1524 if ( substr( $pref, -1 ) == ';') {
1525 # The one nasty exception: definition lists work like this:
1526 # ; title : definition text
1527 # So we check for : in the remainder text to split up the
1528 # title and definition, without b0rking links.
1529 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1530 if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
1532 $output .= $term . $this->nextItem( ':' );
1536 } elseif( $prefixLength ||
$lastPrefixLength ) {
1537 # Either open or close a level...
1538 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1539 $paragraphStack = false;
1541 while( $commonPrefixLength < $lastPrefixLength ) {
1542 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1543 --$lastPrefixLength;
1545 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1546 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1548 while ( $prefixLength > $commonPrefixLength ) {
1549 $char = substr( $pref, $commonPrefixLength, 1 );
1550 $output .= $this->openList( $char );
1552 if ( ';' == $char ) {
1553 # FIXME: This is dupe of code above
1554 if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
1556 $output .= $term . $this->nextItem( ":" );
1560 ++
$commonPrefixLength;
1562 $lastPrefix = $pref2;
1564 if( 0 == $prefixLength ) {
1565 # No prefix (not in list)--go to paragraph mode
1566 $uniq_prefix = UNIQ_PREFIX
;
1567 // XXX: use a stack for nestable elements like span, table and div
1568 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1569 $closematch = preg_match(
1570 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1571 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1572 if ( $openmatch or $closematch ) {
1573 $paragraphStack = false;
1574 $output .= $this->closeParagraph();
1575 if($preOpenMatch and !$preCloseMatch) {
1576 $this->mInPre
= true;
1578 if ( $closematch ) {
1579 $inBlockElem = false;
1581 $inBlockElem = true;
1583 } else if ( !$inBlockElem && !$this->mInPre
) {
1584 if ( " " == $t{0} and ( $this->mLastSection
== 'pre' or trim($t) != '' ) ) {
1586 if ($this->mLastSection
!= 'pre') {
1587 $paragraphStack = false;
1588 $output .= $this->closeParagraph().'<pre>';
1589 $this->mLastSection
= 'pre';
1593 if ( '' == trim($t) ) {
1594 if ( $paragraphStack ) {
1595 $output .= $paragraphStack.'<br />';
1596 $paragraphStack = false;
1597 $this->mLastSection
= 'p';
1599 if ($this->mLastSection
!= 'p' ) {
1600 $output .= $this->closeParagraph();
1601 $this->mLastSection
= '';
1602 $paragraphStack = '<p>';
1604 $paragraphStack = '</p><p>';
1608 if ( $paragraphStack ) {
1609 $output .= $paragraphStack;
1610 $paragraphStack = false;
1611 $this->mLastSection
= 'p';
1612 } else if ($this->mLastSection
!= 'p') {
1613 $output .= $this->closeParagraph().'<p>';
1614 $this->mLastSection
= 'p';
1620 if ($paragraphStack === false) {
1624 while ( $prefixLength ) {
1625 $output .= $this->closeList( $pref2{$prefixLength-1} );
1628 if ( '' != $this->mLastSection
) {
1629 $output .= '</' . $this->mLastSection
. '>';
1630 $this->mLastSection
= '';
1633 wfProfileOut( $fname );
1637 # Return value of a magic variable (like PAGENAME)
1638 function getVariableValue( $index ) {
1639 global $wgLang, $wgSitename, $wgServer;
1642 case MAG_CURRENTMONTH
:
1643 return $wgLang->formatNum( date( 'm' ) );
1644 case MAG_CURRENTMONTHNAME
:
1645 return $wgLang->getMonthName( date('n') );
1646 case MAG_CURRENTMONTHNAMEGEN
:
1647 return $wgLang->getMonthNameGen( date('n') );
1648 case MAG_CURRENTDAY
:
1649 return $wgLang->formatNum( date('j') );
1651 return $this->mTitle
->getText();
1653 return $this->mTitle
->getPartialURL();
1655 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1656 return $wgLang->getNsText($this->mTitle
->getNamespace()); # Patch by Dori
1657 case MAG_CURRENTDAYNAME
:
1658 return $wgLang->getWeekdayName( date('w')+
1 );
1659 case MAG_CURRENTYEAR
:
1660 return $wgLang->formatNum( date( 'Y' ) );
1661 case MAG_CURRENTTIME
:
1662 return $wgLang->time( wfTimestampNow(), false );
1663 case MAG_NUMBEROFARTICLES
:
1664 return $wgLang->formatNum( wfNumberOfArticles() );
1674 # initialise the magic variables (like CURRENTMONTHNAME)
1675 function initialiseVariables() {
1676 global $wgVariableIDs;
1677 $this->mVariables
= array();
1678 foreach ( $wgVariableIDs as $id ) {
1679 $mw =& MagicWord
::get( $id );
1680 $mw->addToArray( $this->mVariables
, $this->getVariableValue( $id ) );
1684 /* private */ function replaceVariables( $text, $args = array() ) {
1685 global $wgLang, $wgScript, $wgArticlePath;
1687 # Prevent too big inclusions
1688 if(strlen($text)> MAX_INCLUDE_SIZE
)
1691 $fname = 'Parser::replaceVariables';
1692 wfProfileIn( $fname );
1695 $titleChars = Title
::legalChars();
1696 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars );
1698 # This function is called recursively. To keep track of arguments we need a stack:
1699 array_push( $this->mArgStack
, $args );
1701 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1702 $GLOBALS['wgCurParser'] =& $this;
1705 if ( $this->mOutputType
== OT_HTML
) {
1706 # Variable substitution
1707 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
1709 # Argument substitution
1710 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1712 # Template substitution
1713 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
1714 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1716 array_pop( $this->mArgStack
);
1718 wfProfileOut( $fname );
1722 function variableSubstitution( $matches ) {
1723 if ( !$this->mVariables
) {
1724 $this->initialiseVariables();
1726 if ( array_key_exists( $matches[1], $this->mVariables
) ) {
1727 $text = $this->mVariables
[$matches[1]];
1728 $this->mOutput
->mContainsOldMagic
= true;
1730 $text = $matches[0];
1735 # Split template arguments
1736 function getTemplateArgs( $argsString ) {
1737 if ( $argsString === '' ) {
1741 $args = explode( '|', substr( $argsString, 1 ) );
1743 # If any of the arguments contains a '[[' but no ']]', it needs to be
1744 # merged with the next arg because the '|' character between belongs
1745 # to the link syntax and not the template parameter syntax.
1746 $argc = count($args);
1748 for ( $i = 0; $i < $argc-1; $i++
) {
1749 if ( substr_count ( $args[$i], "[[" ) != substr_count ( $args[$i], "]]" ) ) {
1750 $args[$i] .= "|".$args[$i+
1];
1751 array_splice($args, $i+
1, 1);
1760 function braceSubstitution( $matches ) {
1761 global $wgLinkCache, $wgLang;
1762 $fname = 'Parser::braceSubstitution';
1769 # $newline is an optional newline character before the braces
1770 # $part1 is the bit before the first |, and must contain only title characters
1771 # $args is a list of arguments, starting from index 0, not including $part1
1773 $newline = $matches[1];
1774 $part1 = $matches[2];
1775 # If the third subpattern matched anything, it will start with |
1777 $args = $this->getTemplateArgs($matches[3]);
1778 $argc = count( $args );
1781 if ( strpos( $matches[0], '{{{' ) !== false ) {
1782 $text = $matches[0];
1789 $mwSubst =& MagicWord
::get( MAG_SUBST
);
1790 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1791 if ( $this->mOutputType
!= OT_WIKI
) {
1792 # Invalid SUBST not replaced at PST time
1793 # Return without further processing
1794 $text = $matches[0];
1798 } elseif ( $this->mOutputType
== OT_WIKI
) {
1799 # SUBST not found in PST pass, do nothing
1800 $text = $matches[0];
1805 # MSG, MSGNW and INT
1808 $mwMsgnw =& MagicWord
::get( MAG_MSGNW
);
1809 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1812 # Remove obsolete MSG:
1813 $mwMsg =& MagicWord
::get( MAG_MSG
);
1814 $mwMsg->matchStartAndRemove( $part1 );
1817 # Check if it is an internal message
1818 $mwInt =& MagicWord
::get( MAG_INT
);
1819 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1820 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1821 $text = wfMsgReal( $part1, $args, true );
1829 # Check for NS: (namespace expansion)
1830 $mwNs = MagicWord
::get( MAG_NS
);
1831 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1832 if ( intval( $part1 ) ) {
1833 $text = $wgLang->getNsText( intval( $part1 ) );
1836 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1837 if ( !is_null( $index ) ) {
1838 $text = $wgLang->getNsText( $index );
1845 # LOCALURL and LOCALURLE
1847 $mwLocal = MagicWord
::get( MAG_LOCALURL
);
1848 $mwLocalE = MagicWord
::get( MAG_LOCALURLE
);
1850 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1851 $func = 'getLocalURL';
1852 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1853 $func = 'escapeLocalURL';
1858 if ( $func !== '' ) {
1859 $title = Title
::newFromText( $part1 );
1860 if ( !is_null( $title ) ) {
1862 $text = $title->$func( $args[0] );
1864 $text = $title->$func();
1871 # Internal variables
1872 if ( !$this->mVariables
) {
1873 $this->initialiseVariables();
1875 if ( !$found && array_key_exists( $part1, $this->mVariables
) ) {
1876 $text = $this->mVariables
[$part1];
1878 $this->mOutput
->mContainsOldMagic
= true;
1881 # Template table test
1883 # Did we encounter this template already? If yes, it is in the cache
1884 # and we need to check for loops.
1885 if ( isset( $this->mTemplates
[$part1] ) ) {
1886 # Infinite loop test
1887 if ( isset( $this->mTemplatePath
[$part1] ) ) {
1891 # set $text to cached message.
1892 $text = $this->mTemplates
[$part1];
1896 # Load from database
1898 $title = Title
::newFromText( $part1, NS_TEMPLATE
);
1899 if ( !is_null( $title ) && !$title->isExternal() ) {
1900 # Check for excessive inclusion
1901 $dbk = $title->getPrefixedDBkey();
1902 if ( $this->incrementIncludeCount( $dbk ) ) {
1903 # This should never be reached.
1904 $article = new Article( $title );
1905 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1906 if ( $articleContent !== false ) {
1908 $text = $articleContent;
1912 # If the title is valid but undisplayable, make a link to it
1913 if ( $this->mOutputType
== OT_HTML
&& !$found ) {
1914 $text = '[['.$title->getPrefixedText().']]';
1918 # Template cache array insertion
1919 $this->mTemplates
[$part1] = $text;
1923 # Recursive parsing, escaping and link table handling
1924 # Only for HTML output
1925 if ( $nowiki && $found && $this->mOutputType
== OT_HTML
) {
1926 $text = wfEscapeWikiText( $text );
1927 } elseif ( $this->mOutputType
== OT_HTML
&& $found && !$noparse) {
1928 # Clean up argument array
1929 $assocArgs = array();
1931 foreach( $args as $arg ) {
1932 $eqpos = strpos( $arg, '=' );
1933 if ( $eqpos === false ) {
1934 $assocArgs[$index++
] = $arg;
1936 $name = trim( substr( $arg, 0, $eqpos ) );
1937 $value = trim( substr( $arg, $eqpos+
1 ) );
1938 if ( $value === false ) {
1941 if ( $name !== false ) {
1942 $assocArgs[$name] = $value;
1947 # Do not enter included links in link table
1948 if ( !is_null( $title ) ) {
1949 $wgLinkCache->suspend();
1952 # Add a new element to the templace recursion path
1953 $this->mTemplatePath
[$part1] = 1;
1955 $text = $this->stripParse( $text, $newline, $assocArgs );
1957 # Resume the link cache and register the inclusion as a link
1958 if ( !is_null( $title ) ) {
1959 $wgLinkCache->resume();
1960 $wgLinkCache->addLinkObj( $title );
1963 # Empties the template path
1964 $this->mTemplatePath
= array();
1973 # Triple brace replacement -- used for template arguments
1974 function argSubstitution( $matches ) {
1975 $newline = $matches[1];
1976 $arg = trim( $matches[2] );
1977 $text = $matches[0];
1978 $inputArgs = end( $this->mArgStack
);
1980 if ( array_key_exists( $arg, $inputArgs ) ) {
1981 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1987 # Returns true if the function is allowed to include this entity
1988 function incrementIncludeCount( $dbk ) {
1989 if ( !array_key_exists( $dbk, $this->mIncludeCount
) ) {
1990 $this->mIncludeCount
[$dbk] = 0;
1992 if ( ++
$this->mIncludeCount
[$dbk] <= MAX_INCLUDE_REPEAT
) {
2000 # Cleans up HTML, removes dangerous tags and attributes
2001 /* private */ function removeHTMLtags( $text ) {
2002 global $wgUseTidy, $wgUserHtml;
2003 $fname = 'Parser::removeHTMLtags';
2004 wfProfileIn( $fname );
2007 $htmlpairs = array( # Tags that must be closed
2008 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
2009 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
2010 'strike', 'strong', 'tt', 'var', 'div', 'center',
2011 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
2012 'ruby', 'rt' , 'rb' , 'rp', 'p'
2014 $htmlsingle = array(
2015 'br', 'hr', 'li', 'dt', 'dd'
2017 $htmlnest = array( # Tags that can be nested--??
2018 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
2019 'dl', 'font', 'big', 'small', 'sub', 'sup'
2021 $tabletags = array( # Can only appear inside table
2025 $htmlpairs = array();
2026 $htmlsingle = array();
2027 $htmlnest = array();
2028 $tabletags = array();
2031 $htmlsingle = array_merge( $tabletags, $htmlsingle );
2032 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
2034 $htmlattrs = $this->getHTMLattrs () ;
2036 # Remove HTML comments
2037 $text = preg_replace( '/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU', '$2', $text );
2039 $bits = explode( '<', $text );
2040 $text = array_shift( $bits );
2042 $tagstack = array(); $tablestack = array();
2043 foreach ( $bits as $x ) {
2044 $prev = error_reporting( E_ALL
& ~
( E_NOTICE | E_WARNING
) );
2045 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2047 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2048 error_reporting( $prev );
2051 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2055 if ( ! in_array( $t, $htmlsingle ) &&
2056 ( $ot = @array_pop
( $tagstack ) ) != $t ) {
2057 @array_push
( $tagstack, $ot );
2060 if ( $t == 'table' ) {
2061 $tagstack = array_pop( $tablestack );
2066 # Keep track for later
2067 if ( in_array( $t, $tabletags ) &&
2068 ! in_array( 'table', $tagstack ) ) {
2070 } else if ( in_array( $t, $tagstack ) &&
2071 ! in_array ( $t , $htmlnest ) ) {
2073 } else if ( ! in_array( $t, $htmlsingle ) ) {
2074 if ( $t == 'table' ) {
2075 array_push( $tablestack, $tagstack );
2076 $tagstack = array();
2078 array_push( $tagstack, $t );
2080 # Strip non-approved attributes from the tag
2081 $newparams = $this->fixTagAttributes($params);
2085 $rest = str_replace( '>', '>', $rest );
2086 $text .= "<$slash$t $newparams$brace$rest";
2090 $text .= '<' . str_replace( '>', '>', $x);
2092 # Close off any remaining tags
2093 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
2095 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2098 # this might be possible using tidy itself
2099 foreach ( $bits as $x ) {
2100 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2102 @list
( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2103 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2104 $newparams = $this->fixTagAttributes($params);
2105 $rest = str_replace( '>', '>', $rest );
2106 $text .= "<$slash$t $newparams$brace$rest";
2108 $text .= '<' . str_replace( '>', '>', $x);
2112 wfProfileOut( $fname );
2117 # This function accomplishes several tasks:
2118 # 1) Auto-number headings if that option is enabled
2119 # 2) Add an [edit] link to sections for logged in users who have enabled the option
2120 # 3) Add a Table of contents on the top for users who have enabled the option
2121 # 4) Auto-anchor headings
2123 # It loops through all headlines, collects the necessary data, then splits up the
2124 # string and re-inserts the newly formatted headlines.
2125 /* private */ function formatHeadings( $text, $isMain=true ) {
2126 global $wgInputEncoding, $wgMaxTocLevel;
2128 $doNumberHeadings = $this->mOptions
->getNumberHeadings();
2129 $doShowToc = $this->mOptions
->getShowToc();
2130 $forceTocHere = false;
2131 if( !$this->mTitle
->userCanEdit() ) {
2133 $rightClickHack = 0;
2135 $showEditLink = $this->mOptions
->getEditSection();
2136 $rightClickHack = $this->mOptions
->getEditSectionOnRightClick();
2139 # Inhibit editsection links if requested in the page
2140 $esw =& MagicWord
::get( MAG_NOEDITSECTION
);
2141 if( $esw->matchAndRemove( $text ) ) {
2144 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2146 $mw =& MagicWord
::get( MAG_NOTOC
);
2147 if( $mw->matchAndRemove( $text ) ) {
2151 # never add the TOC to the Main Page. This is an entry page that should not
2152 # be more than 1-2 screens large anyway
2153 if( $this->mTitle
->getPrefixedText() == wfMsg('mainpage') ) {
2157 # Get all headlines for numbering them and adding funky stuff like [edit]
2158 # links - this is for later, but we need the number of headlines right now
2159 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2161 # if there are fewer than 4 headlines in the article, do not show TOC
2162 if( $numMatches < 4 ) {
2166 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2167 # override above conditions and always show TOC at that place
2168 $mw =& MagicWord
::get( MAG_TOC
);
2169 if ($mw->match( $text ) ) {
2171 $forceTocHere = true;
2173 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2174 # override above conditions and always show TOC above first header
2175 $mw =& MagicWord
::get( MAG_FORCETOC
);
2176 if ($mw->matchAndRemove( $text ) ) {
2183 # We need this to perform operations on the HTML
2184 $sk =& $this->mOptions
->getSkin();
2189 # Ugh .. the TOC should have neat indentation levels which can be
2190 # passed to the skin functions. These are determined here
2195 $sublevelCount = array();
2198 foreach( $matches[3] as $headline ) {
2201 $prevlevel = $level;
2203 $level = $matches[1][$headlineCount];
2204 if( ( $doNumberHeadings ||
$doShowToc ) && $prevlevel && $level > $prevlevel ) {
2205 # reset when we enter a new level
2206 $sublevelCount[$level] = 0;
2207 $toc .= $sk->tocIndent( $level - $prevlevel );
2208 $toclevel +
= $level - $prevlevel;
2210 if( ( $doNumberHeadings ||
$doShowToc ) && $level < $prevlevel ) {
2211 # reset when we step back a level
2212 $sublevelCount[$level+
1]=0;
2213 $toc .= $sk->tocUnindent( $prevlevel - $level );
2214 $toclevel -= $prevlevel - $level;
2216 # count number of headlines for each level
2217 @$sublevelCount[$level]++
;
2218 if( $doNumberHeadings ||
$doShowToc ) {
2220 for( $i = 1; $i <= $level; $i++
) {
2221 if( !empty( $sublevelCount[$i] ) ) {
2225 $numbering .= $sublevelCount[$i];
2231 # The canonized header is a version of the header text safe to use for links
2232 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2233 $canonized_headline = $this->unstrip( $headline, $this->mStripState
);
2234 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState
);
2237 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2238 $tocline = trim( $canonized_headline );
2239 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT
, $wgInputEncoding ) );
2240 $replacearray = array(
2244 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2245 $refer[$headlineCount] = $canonized_headline;
2247 # count how many in assoc. array so we can track dupes in anchors
2248 @$refers[$canonized_headline]++
;
2249 $refcount[$headlineCount]=$refers[$canonized_headline];
2251 # Prepend the number to the heading text
2253 if( $doNumberHeadings ||
$doShowToc ) {
2254 $tocline = $numbering . ' ' . $tocline;
2256 # Don't number the heading if it is the only one (looks silly)
2257 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2258 # the two are different if the line contains a link
2259 $headline=$numbering . ' ' . $headline;
2263 # Create the anchor for linking from the TOC to the section
2264 $anchor = $canonized_headline;
2265 if($refcount[$headlineCount] > 1 ) {
2266 $anchor .= '_' . $refcount[$headlineCount];
2268 if( $doShowToc && ( !isset($wgMaxTocLevel) ||
$toclevel<$wgMaxTocLevel ) ) {
2269 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2271 if( $showEditLink ) {
2272 if ( empty( $head[$headlineCount] ) ) {
2273 $head[$headlineCount] = '';
2275 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+
1);
2278 # Add the edit section span
2279 if( $rightClickHack ) {
2280 $headline = $sk->editSectionScript($headlineCount+
1,$headline);
2283 # give headline the correct <h#> tag
2284 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
2290 $toclines = $headlineCount;
2291 $toc .= $sk->tocUnindent( $toclevel );
2292 $toc = $sk->tocTable( $toc );
2295 # split up and insert constructed headlines
2297 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2300 foreach( $blocks as $block ) {
2301 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2302 # This is the [edit] link that appears for the top block of text when
2303 # section editing is enabled
2305 # Disabled because it broke block formatting
2306 # For example, a bullet point in the top line
2307 # $full .= $sk->editSectionLink(0);
2310 if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2311 # Top anchor now in skin
2315 if( !empty( $head[$i] ) ) {
2321 $mw =& MagicWord
::get( MAG_TOC
);
2322 return $mw->replace( $toc, $full );
2328 # Return an HTML link for the "ISBN 123456" text
2329 /* private */ function magicISBN( $text ) {
2331 $fname = 'Parser::magicISBN';
2332 wfProfileIn( $fname );
2334 $a = split( 'ISBN ', " $text" );
2335 if ( count ( $a ) < 2 ) {
2336 wfProfileOut( $fname );
2339 $text = substr( array_shift( $a ), 1);
2340 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2342 foreach ( $a as $x ) {
2343 $isbn = $blank = '' ;
2344 while ( ' ' == $x{0} ) {
2346 $x = substr( $x, 1 );
2348 while ( strstr( $valid, $x{0} ) != false ) {
2350 $x = substr( $x, 1 );
2352 $num = str_replace( '-', '', $isbn );
2353 $num = str_replace( ' ', '', $num );
2356 $text .= "ISBN $blank$x";
2358 $titleObj = Title
::makeTitle( NS_SPECIAL
, 'Booksources' );
2359 $text .= '<a href="' .
2360 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
2361 "\" class=\"internal\">ISBN $isbn</a>";
2365 wfProfileOut( $fname );
2369 # Return an HTML link for the "GEO ..." text
2370 /* private */ function magicGEO( $text ) {
2371 global $wgLang, $wgUseGeoMode;
2372 $fname = 'Parser::magicGEO';
2373 wfProfileIn( $fname );
2375 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2376 $directions = array ( "N" => "North" , "S" => "South" , "E" => "East" , "W" => "West" ) ;
2377 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['N']}, (\d+)°(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2378 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['N']}, (\d+)°(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2379 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['S']}, (\d+)°(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2380 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['S']}, (\d+)°(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2382 $a = split( 'GEO ', " $text" );
2383 if ( count ( $a ) < 2 ) {
2384 wfProfileOut( $fname );
2387 $text = substr( array_shift( $a ), 1);
2388 $valid = '0123456789.+-:';
2390 foreach ( $a as $x ) {
2391 $geo = $blank = '' ;
2392 while ( ' ' == $x{0} ) {
2394 $x = substr( $x, 1 );
2396 while ( strstr( $valid, $x{0} ) != false ) {
2398 $x = substr( $x, 1 );
2400 $num = str_replace( '+', '', $geo );
2401 $num = str_replace( ' ', '', $num );
2403 if ( '' == $num ||
count ( explode ( ":" , $num , 3 ) ) < 2 ) {
2404 $text .= "GEO $blank$x";
2406 $titleObj = Title
::makeTitle( NS_SPECIAL
, 'Geo' );
2407 $text .= '<a href="' .
2408 $titleObj->escapeLocalUrl( "coordinates={$num}" ) .
2409 "\" class=\"internal\">GEO $geo</a>";
2413 wfProfileOut( $fname );
2417 # Return an HTML link for the "RFC 1234" text
2418 /* private */ function magicRFC( $text ) {
2421 $a = split( 'RFC ', ' '.$text );
2422 if ( count ( $a ) < 2 ) return $text;
2423 $text = substr( array_shift( $a ), 1);
2424 $valid = '0123456789';
2426 foreach ( $a as $x ) {
2427 $rfc = $blank = '' ;
2428 while ( ' ' == $x{0} ) {
2430 $x = substr( $x, 1 );
2432 while ( strstr( $valid, $x{0} ) != false ) {
2434 $x = substr( $x, 1 );
2438 $text .= "RFC $blank$x";
2440 $url = wfmsg( 'rfcurl' );
2441 $url = str_replace( '$1', $rfc, $url);
2442 $sk =& $this->mOptions
->getSkin();
2443 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
2444 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2450 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2451 $this->mOptions
= $options;
2452 $this->mTitle
=& $title;
2453 $this->mOutputType
= OT_WIKI
;
2455 if ( $clearState ) {
2456 $this->clearState();
2459 $stripState = false;
2463 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2467 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2468 "/<br *?>/i" => "<br />",
2470 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2472 $text = $this->strip( $text, $stripState, false );
2473 $text = $this->pstPass2( $text, $user );
2474 $text = $this->unstrip( $text, $stripState );
2475 $text = $this->unstripNoWiki( $text, $stripState );
2479 /* private */ function pstPass2( $text, &$user ) {
2480 global $wgLang, $wgLocaltimezone, $wgCurParser;
2482 # Variable replacement
2483 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2484 $text = $this->replaceVariables( $text );
2488 $n = $user->getName();
2489 $k = $user->getOption( 'nickname' );
2490 if ( '' == $k ) { $k = $n; }
2491 if(isset($wgLocaltimezone)) {
2492 $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2494 /* Note: this is an ugly timezone hack for the European wikis */
2495 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2496 ' (' . date( 'T' ) . ')';
2497 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2499 $text = preg_replace( '/~~~~~/', $d, $text );
2500 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText( NS_USER
) . ":$n|$k]] $d", $text );
2501 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText( NS_USER
) . ":$n|$k]]", $text );
2503 # Context links: [[|name]] and [[name (context)|]]
2505 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2506 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2507 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2508 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2510 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
2511 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
2512 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
2513 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
2514 # [[ns:page (cont)|]]
2516 $t = $this->mTitle
->getText();
2517 if ( preg_match( $conpat, $t, $m ) ) {
2520 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2521 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2522 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2524 if ( '' == $context ) {
2525 $text = preg_replace( $p2, '[[\\1]]', $text );
2527 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2531 $mw =& MagicWord::get( MAG_SUBST );
2532 $wgCurParser = $this->fork();
2533 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2534 $this->merge( $wgCurParser );
2537 # Trim trailing whitespace
2538 # MAG_END (__END__) tag allows for trailing
2539 # whitespace to be deliberately included
2540 $text = rtrim( $text );
2541 $mw =& MagicWord
::get( MAG_END
);
2542 $mw->matchAndRemove( $text );
2547 # Set up some variables which are usually set up in parse()
2548 # so that an external function can call some class members with confidence
2549 function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2550 $this->mTitle
=& $title;
2551 $this->mOptions
= $options;
2552 $this->mOutputType
= $outputType;
2553 if ( $clearState ) {
2554 $this->clearState();
2558 function transformMsg( $text, $options ) {
2560 static $executing = false;
2562 # Guard against infinite recursion
2568 $this->mTitle
= $wgTitle;
2569 $this->mOptions
= $options;
2570 $this->mOutputType
= OT_MSG
;
2571 $this->clearState();
2572 $text = $this->replaceVariables( $text );
2578 # Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2579 # Callback will be called with the text within
2580 # Transform and return the text within
2581 function setHook( $tag, $callback ) {
2582 $oldVal = @$this->mTagHooks
[$tag];
2583 $this->mTagHooks
[$tag] = $callback;
2590 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2591 var $mCacheTime; # Used in ParserCache
2593 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2594 $containsOldMagic = false )
2596 $this->mText
= $text;
2597 $this->mLanguageLinks
= $languageLinks;
2598 $this->mCategoryLinks
= $categoryLinks;
2599 $this->mContainsOldMagic
= $containsOldMagic;
2600 $this->mCacheTime
= "";
2603 function getText() { return $this->mText
; }
2604 function getLanguageLinks() { return $this->mLanguageLinks
; }
2605 function getCategoryLinks() { return $this->mCategoryLinks
; }
2606 function getCacheTime() { return $this->mCacheTime
; }
2607 function containsOldMagic() { return $this->mContainsOldMagic
; }
2608 function setText( $text ) { return wfSetVar( $this->mText
, $text ); }
2609 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks
, $ll ); }
2610 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks
, $cl ); }
2611 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic
, $com ); }
2612 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime
, $t ); }
2614 function merge( $other ) {
2615 $this->mLanguageLinks
= array_merge( $this->mLanguageLinks
, $other->mLanguageLinks
);
2616 $this->mCategoryLinks
= array_merge( $this->mCategoryLinks
, $this->mLanguageLinks
);
2617 $this->mContainsOldMagic
= $this->mContainsOldMagic ||
$other->mContainsOldMagic
;
2624 # All variables are private
2625 var $mUseTeX; # Use texvc to expand <math> tags
2626 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2627 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2628 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2629 var $mAllowExternalImages; # Allow external images inline
2630 var $mSkin; # Reference to the preferred skin
2631 var $mDateFormat; # Date format index
2632 var $mEditSection; # Create "edit section" links
2633 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2634 var $mNumberHeadings; # Automatically number headings
2635 var $mShowToc; # Show table of contents
2637 function getUseTeX() { return $this->mUseTeX
; }
2638 function getUseCategoryMagic() { return $this->mUseCategoryMagic
; }
2639 function getUseDynamicDates() { return $this->mUseDynamicDates
; }
2640 function getInterwikiMagic() { return $this->mInterwikiMagic
; }
2641 function getAllowExternalImages() { return $this->mAllowExternalImages
; }
2642 function getSkin() { return $this->mSkin
; }
2643 function getDateFormat() { return $this->mDateFormat
; }
2644 function getEditSection() { return $this->mEditSection
; }
2645 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick
; }
2646 function getNumberHeadings() { return $this->mNumberHeadings
; }
2647 function getShowToc() { return $this->mShowToc
; }
2649 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX
, $x ); }
2650 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic
, $x ); }
2651 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates
, $x ); }
2652 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic
, $x ); }
2653 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages
, $x ); }
2654 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat
, $x ); }
2655 function setEditSection( $x ) { return wfSetVar( $this->mEditSection
, $x ); }
2656 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick
, $x ); }
2657 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings
, $x ); }
2658 function setShowToc( $x ) { return wfSetVar( $this->mShowToc
, $x ); }
2660 function setSkin( &$x ) { $this->mSkin
=& $x; }
2662 # Get parser options
2663 /* static */ function newFromUser( &$user ) {
2664 $popts = new ParserOptions
;
2665 $popts->initialiseFromUser( $user );
2670 function initialiseFromUser( &$userInput ) {
2671 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2673 if ( !$userInput ) {
2675 $user->setLoaded( true );
2677 $user =& $userInput;
2680 $this->mUseTeX
= $wgUseTeX;
2681 $this->mUseCategoryMagic
= $wgUseCategoryMagic;
2682 $this->mUseDynamicDates
= $wgUseDynamicDates;
2683 $this->mInterwikiMagic
= $wgInterwikiMagic;
2684 $this->mAllowExternalImages
= $wgAllowExternalImages;
2685 $this->mSkin
=& $user->getSkin();
2686 $this->mDateFormat
= $user->getOption( 'date' );
2687 $this->mEditSection
= $user->getOption( 'editsection' );
2688 $this->mEditSectionOnRightClick
= $user->getOption( 'editsectiononrightclick' );
2689 $this->mNumberHeadings
= $user->getOption( 'numberheadings' );
2690 $this->mShowToc
= $user->getOption( 'showtoc' );
2696 # Regex callbacks, used in Parser::replaceVariables
2697 function wfBraceSubstitution( $matches ) {
2698 global $wgCurParser;
2699 return $wgCurParser->braceSubstitution( $matches );
2702 function wfArgSubstitution( $matches ) {
2703 global $wgCurParser;
2704 return $wgCurParser->argSubstitution( $matches );
2707 function wfVariableSubstitution( $matches ) {
2708 global $wgCurParser;
2709 return $wgCurParser->variableSubstitution( $matches );
2712 # Return the total number of articles
2713 function wfNumberOfArticles() {
2714 global $wgNumberOfArticles;
2717 return $wgNumberOfArticles;
2720 # Get various statistics from the database
2721 /* private */ function wfLoadSiteStats() {
2722 global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2723 $fname = 'wfLoadSiteStats';
2725 if ( -1 != $wgNumberOfArticles ) return;
2726 $dbr =& wfGetDB( DB_SLAVE
);
2727 $s = $dbr->getArray( 'site_stats',
2728 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2729 array( 'ss_row_id' => 1 ), $fname
2732 if ( $s === false ) {
2735 $wgTotalViews = $s->ss_total_views
;
2736 $wgTotalEdits = $s->ss_total_edits
;
2737 $wgNumberOfArticles = $s->ss_good_articles
;
2741 function wfEscapeHTMLTagsOnly( $in ) {
2743 array( '"', '>', '<' ),
2744 array( '"', '>', '<' ),