aeed58cc24045c85d08b4242c8d467220a730687
3 // require_once('Tokenizer.php');
7 # Processes wiki markup
9 # There are two main entry points into the Parser class: parse() and preSaveTransform().
10 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
13 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
15 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
17 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
18 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
21 # * only within ParserOptions
24 #----------------------------------------
25 # Variable substitution O(N^2) attack
26 #-----------------------------------------
27 # Without countermeasures, it would be possible to attack the parser by saving a page
28 # filled with a large number of inclusions of large pages. The size of the generated
29 # page would be proportional to the square of the input size. Hence, we limit the number
30 # of inclusions of any given page, thus bringing any attack back to O(N).
33 define( "MAX_INCLUDE_REPEAT", 20 );
34 define( "MAX_INCLUDE_SIZE", 1000000 ); // 1 Million
36 # Allowed values for $mOutputType
37 define( "OT_HTML", 1 );
38 define( "OT_WIKI", 2 );
39 define( "OT_MSG", 3 );
41 # string parameter for extractTags which will cause it
42 # to strip HTML comments in addition to regular
43 # <XML>-style tags. This should not be anything we
44 # may want to use in wikisyntax
45 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
47 # prefix for escaping, used in two functions at least
48 define( 'UNIQ_PREFIX', 'NaodW29');
51 # Constants needed for external link processing
53 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
54 define( 'HTTP_PROTOCOLS', 'http|https' );
55 # Everything except bracket, space, or control characters
56 define( 'EXT_LINK_URL_CLASS', '[^]\\x00-\\x20\\x7F]' );
57 define( 'INVERSE_EXT_LINK_URL_CLASS', '[\]\\x00-\\x20\\x7F]' );
59 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
60 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
61 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
62 define( 'EXT_LINK_BRACKETED', '/\[(('.URL_PROTOCOLS
.'):'.EXT_LINK_URL_CLASS
.'+) *('.EXT_LINK_TEXT_CLASS
.'*?)\]/S' );
63 define( 'EXT_IMAGE_REGEX',
64 '/^('.HTTP_PROTOCOLS
.':)'. # Protocol
65 '('.EXT_LINK_URL_CLASS
.'+)\\/'. # Hostname and path
66 '('.EXT_IMAGE_FNAME_CLASS
.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS
.')$/S' # Filename
74 # Cleared with clearState():
75 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
76 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
79 var $mOptions, $mTitle, $mOutputType,
80 $mTemplates, // cache of already loaded templates, avoids
81 // multiple SQL queries for the same string
82 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
83 // in this path. Used for loop detection.
86 $this->mTemplates
= array();
87 $this->mTemplatePath
= array();
88 $this->mTagHooks
= array();
92 function clearState() {
93 $this->mOutput
= new ParserOutput
;
94 $this->mAutonumber
= 0;
95 $this->mLastSection
= "";
96 $this->mDTopen
= false;
97 $this->mVariables
= false;
98 $this->mIncludeCount
= array();
99 $this->mStripState
= array();
100 $this->mArgStack
= array();
101 $this->mInPre
= false;
104 # First pass--just handle <nowiki> sections, pass the rest off
105 # to internalParse() which does all the real work.
107 # Returns a ParserOutput
109 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
111 $fname = "Parser::parse";
112 wfProfileIn( $fname );
118 $this->mOptions
= $options;
119 $this->mTitle
=& $title;
120 $this->mOutputType
= OT_HTML
;
123 $text = $this->strip( $text, $this->mStripState
);
124 $text = $this->internalParse( $text, $linestart );
125 $text = $this->unstrip( $text, $this->mStripState
);
126 # Clean up special characters, only run once, next-to-last before doBlockLevels
129 # french spaces, last one Guillemet-left
130 # only if there is something before the space
131 '/(.) (\\?|:|;|!|\\302\\273)/i' => '\\1 \\2',
132 # french spaces, Guillemet-right
133 "/(\\302\\253) /i"=>"\\1 ",
134 '/<hr *>/i' => '<hr />',
135 '/<br *>/i' => '<br />',
136 '/<center *>/i' => '<div class="center">',
137 '/<\\/center *>/i' => '</div>',
138 # Clean up spare ampersands; note that we probably ought to be
139 # more careful about named entities.
140 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&'
142 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
145 # french spaces, last one Guillemet-left
146 '/ (\\?|:|;|!|\\302\\273)/i' => ' \\1',
147 # french spaces, Guillemet-right
148 '/(\\302\\253) /i' => '\\1 ',
149 '/([^> ]+(0(1|3|9);)[^< ]*)/i' => '<span class="diacrit">\\1</span>',
150 '/<center *>/i' => '<div class="center">',
151 '/<\\/center *>/i' => '</div>'
153 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
156 $text = $this->doBlockLevels( $text, $linestart );
157 $text = $this->unstripNoWiki( $text, $this->mStripState
);
159 $text = $this->tidy($text);
161 $this->mOutput
->setText( $text );
162 wfProfileOut( $fname );
163 return $this->mOutput
;
166 /* static */ function getRandomString() {
167 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
170 # Replaces all occurrences of <$tag>content</$tag> in the text
171 # with a random marker and returns the new text. the output parameter
172 # $content will be an associative array filled with data on the form
173 # $unique_marker => content.
175 # If $content is already set, the additional entries will be appended
177 # If $tag is set to STRIP_COMMENTS, the function will extract
178 # <!-- HTML comments -->
180 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
181 $rnd = $uniq_prefix . '-' . $tag . Parser
::getRandomString();
188 while ( '' != $text ) {
189 if($tag==STRIP_COMMENTS
) {
190 $p = preg_split( '/<!--/i', $text, 2 );
192 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
195 if ( ( count( $p ) < 2 ) ||
( '' == $p[1] ) ) {
198 if($tag==STRIP_COMMENTS
) {
199 $q = preg_split( '/-->/i', $p[1], 2 );
201 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
203 $marker = $rnd . sprintf('%08X', $n++
);
204 $content[$marker] = $q[0];
205 $stripped .= $marker;
212 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
213 # If $render is set, performs necessary rendering operations on plugins
214 # Returns the text, and fills an array with data needed in unstrip()
215 # If the $state is already a valid strip state, it adds to the state
217 # When $stripcomments is set, HTML comments <!-- like this -->
218 # will be stripped in addition to other tags. This is important
219 # for section editing, where these comments cause confusion when
220 # counting the sections in the wikisource
221 function strip( $text, &$state, $stripcomments = false ) {
222 $render = ($this->mOutputType
== OT_HTML
);
223 $html_content = array();
224 $nowiki_content = array();
225 $math_content = array();
226 $pre_content = array();
227 $comment_content = array();
228 $ext_content = array();
230 # Replace any instances of the placeholders
231 $uniq_prefix = UNIQ_PREFIX
;
232 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
237 $text = Parser
::extractTags('html', $text, $html_content, $uniq_prefix);
238 foreach( $html_content as $marker => $content ) {
240 # Raw and unchecked for validity.
241 $html_content[$marker] = $content;
243 $html_content[$marker] = "<html>$content</html>";
249 $text = Parser
::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
250 foreach( $nowiki_content as $marker => $content ) {
252 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
254 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
259 $text = Parser
::extractTags('math', $text, $math_content, $uniq_prefix);
260 foreach( $math_content as $marker => $content ){
262 if( $this->mOptions
->getUseTeX() ) {
263 $math_content[$marker] = renderMath( $content );
265 $math_content[$marker] = "<math>$content<math>";
268 $math_content[$marker] = "<math>$content</math>";
273 $text = Parser
::extractTags('pre', $text, $pre_content, $uniq_prefix);
274 foreach( $pre_content as $marker => $content ){
276 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
278 $pre_content[$marker] = "<pre>$content</pre>";
284 $text = Parser
::extractTags(STRIP_COMMENTS
, $text, $comment_content, $uniq_prefix);
285 foreach( $comment_content as $marker => $content ){
286 $comment_content[$marker] = "<!--$content-->";
291 foreach ( $this->mTagHooks
as $tag => $callback ) {
292 $ext_contents[$tag] = array();
293 $text = Parser
::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
294 foreach( $ext_content[$tag] as $marker => $content ) {
296 $ext_content[$tag][$marker] = $callback( $content );
298 $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
303 # Merge state with the pre-existing state, if there is one
305 $state['html'] = $state['html'] +
$html_content;
306 $state['nowiki'] = $state['nowiki'] +
$nowiki_content;
307 $state['math'] = $state['math'] +
$math_content;
308 $state['pre'] = $state['pre'] +
$pre_content;
309 $state['comment'] = $state['comment'] +
$comment_content;
311 foreach( $ext_content as $tag => $array ) {
312 if ( array_key_exists( $tag, $state ) ) {
313 $state[$tag] = $state[$tag] +
$array;
318 'html' => $html_content,
319 'nowiki' => $nowiki_content,
320 'math' => $math_content,
321 'pre' => $pre_content,
322 'comment' => $comment_content,
328 # always call unstripNoWiki() after this one
329 function unstrip( $text, &$state ) {
330 # Must expand in reverse order, otherwise nested tags will be corrupted
331 $contentDict = end( $state );
332 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
333 if( key($state) != 'nowiki' && key($state) != 'html') {
334 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
335 $text = str_replace( key( $contentDict ), $content, $text );
342 # always call this after unstrip() to preserve the order
343 function unstripNoWiki( $text, &$state ) {
344 # Must expand in reverse order, otherwise nested tags will be corrupted
345 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
346 $text = str_replace( key( $state['nowiki'] ), $content, $text );
351 for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
352 $text = str_replace( key( $state['html'] ), $content, $text );
359 # Add an item to the strip state
360 # Returns the unique tag which must be inserted into the stripped text
361 # The tag will be replaced with the original text in unstrip()
363 function insertStripItem( $text, &$state ) {
364 $rnd = UNIQ_PREFIX
. '-item' . Parser
::getRandomString();
373 $state['item'][$rnd] = $text;
378 # generate a list of subcategories and pages for a category
379 # depending on wfMsg("usenewcategorypage") it either calls the new
380 # or the old code. The new code will not work properly for some
381 # languages due to sorting issues, so they might want to turn it
383 function categoryMagic() {
384 $msg = wfMsg('usenewcategorypage');
385 if ( '0' == @$msg[0] )
387 return $this->oldCategoryMagic();
389 return $this->newCategoryMagic();
393 # This method generates the list of subcategories and pages for a category
394 function oldCategoryMagic () {
396 $fname = 'Parser::oldCategoryMagic';
398 if ( !$this->mOptions
->getUseCategoryMagic() ) return ; # Doesn't use categories at all
400 if ( $this->mTitle
->getNamespace() != NS_CATEGORY
) return "" ; # This ain't a category page
402 $r = "<br style=\"clear:both;\"/>\n";
405 $sk =& $this->mOptions
->getSkin() ;
407 $articles = array() ;
408 $children = array() ;
410 $id = $this->mTitle
->getArticleID() ;
413 $dbr =& wfGetDB( DB_SLAVE
);
414 $cur = $dbr->tableName( 'cur' );
415 $categorylinks = $dbr->tableName( 'categorylinks' );
417 $t = $dbr->strencode( $this->mTitle
->getDBKey() );
418 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM $cur,$categorylinks " .
419 "WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
420 $res = $dbr->query( $sql, $fname ) ;
421 while ( $x = $dbr->fetchObject ( $res ) ) $data[] = $x ;
423 # For all pages that link to this category
424 foreach ( $data AS $x )
426 $t = $wgLang->getNsText ( $x->cur_namespace
) ;
427 if ( $t != '' ) $t .= ':' ;
428 $t .= $x->cur_title
;
430 if ( $x->cur_namespace
== NS_CATEGORY
) {
431 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
433 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
436 $dbr->freeResult ( $res ) ;
438 # Showing subcategories
439 if ( count ( $children ) > 0 ) {
440 $r .= '<h2>'.wfMsg('subcategories')."</h2>\n" ;
441 $r .= implode ( ', ' , $children ) ;
444 # Showing pages in this category
445 if ( count ( $articles ) > 0 ) {
446 $ti = $this->mTitle
->getText() ;
447 $h = wfMsg( 'category_header', $ti );
448 $r .= "<h2>{$h}</h2>\n" ;
449 $r .= implode ( ', ' , $articles ) ;
455 function newCategoryMagic () {
457 if ( !$this->mOptions
->getUseCategoryMagic() ) return ; # Doesn't use categories at all
459 if ( $this->mTitle
->getNamespace() != NS_CATEGORY
) return '' ; # This ain't a category page
461 $r = "<br style=\"clear:both;\"/>\n";
464 $sk =& $this->mOptions
->getSkin() ;
466 $articles = array() ;
467 $articles_start_char = array();
468 $children = array() ;
469 $children_start_char = array();
471 $id = $this->mTitle
->getArticleID() ;
474 $dbr =& wfGetDB( DB_SLAVE
);
475 $cur = $dbr->tableName( 'cur' );
476 $categorylinks = $dbr->tableName( 'categorylinks' );
478 $t = $dbr->strencode( $this->mTitle
->getDBKey() );
479 $sql = "SELECT DISTINCT cur_title,cur_namespace,cl_sortkey FROM " .
480 "$cur,$categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
481 $res = $dbr->query ( $sql ) ;
482 while ( $x = $dbr->fetchObject ( $res ) )
484 $t = $ns = $wgLang->getNsText ( $x->cur_namespace
) ;
485 if ( $t != '' ) $t .= ':' ;
486 $t .= $x->cur_title
;
488 if ( $x->cur_namespace
== NS_CATEGORY
) {
489 $ctitle = str_replace( '_',' ',$x->cur_title
);
490 array_push ( $children, $sk->makeKnownLink ( $t, $ctitle ) ) ; # Subcategory
492 // If there's a link from Category:A to Category:B, the sortkey of the resulting
493 // entry in the categorylinks table is Category:A, not A, which it SHOULD be.
494 // Workaround: If sortkey == "Category:".$title, than use $title for sorting,
495 // else use sortkey...
496 if ( ($ns.':'.$ctitle) == $x->cl_sortkey
) {
497 array_push ( $children_start_char, $wgLang->firstChar( $x->cur_title
) );
499 array_push ( $children_start_char, $wgLang->firstChar( $x->cl_sortkey
) ) ;
502 array_push ( $articles , $sk->makeKnownLink ( $t ) ) ; # Page in this category
503 array_push ( $articles_start_char, $wgLang->firstChar( $x->cl_sortkey
) ) ;
506 $dbr->freeResult ( $res ) ;
508 $ti = $this->mTitle
->getText() ;
510 # Don't show subcategories section if there are none.
511 if ( count ( $children ) > 0 )
513 # Showing subcategories
514 $r .= '<h2>' . wfMsg( 'subcategories' ) . "</h2>\n";
516 $numchild = count( $children );
518 $r .= wfMsg( 'subcategorycount1', 1 );
520 $r .= wfMsg( 'subcategorycount' , $numchild );
524 if ( count ( $children ) > 6 ) {
526 // divide list into three equal chunks
527 $chunk = (int) (count ( $children ) / 3);
529 // get and display header
530 $r .= '<table width="100%"><tr valign="top">';
535 // loop through the chunks
536 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
538 $chunkIndex++
, $startChunk = $endChunk, $endChunk +
= $chunk +
1)
542 // output all subcategories to category
543 for ($index = $startChunk ;
544 $index < $endChunk && $index < count($children);
547 // check for change of starting letter or begging of chunk
548 if ( ($children_start_char[$index] != $children_start_char[$index - 1])
549 ||
($index == $startChunk) )
551 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
554 $r .= "<li>{$children[$index]}</li>";
560 $r .= '</tr></table>';
562 // for short lists of subcategories to category.
564 $r .= "<h3>{$children_start_char[0]}</h3>\n";
565 $r .= '<ul><li>'.$children[0].'</li>';
566 for ($index = 1; $index < count($children); $index++
)
568 if ($children_start_char[$index] != $children_start_char[$index - 1])
570 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
573 $r .= "<li>{$children[$index]}</li>";
577 } # END of if ( count($children) > 0 )
579 $r .= '<h2>' . wfMsg( 'category_header', $ti ) . "</h2>\n";
581 $numart = count( $articles );
583 $r .= wfMsg( 'categoryarticlecount1', 1 );
585 $r .= wfMsg( 'categoryarticlecount' , $numart );
589 # Showing articles in this category
590 if ( count ( $articles ) > 6) {
591 $ti = $this->mTitle
->getText() ;
593 // divide list into three equal chunks
594 $chunk = (int) (count ( $articles ) / 3);
596 // get and display header
597 $r .= '<table width="100%"><tr valign="top">';
599 // loop through the chunks
600 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
602 $chunkIndex++
, $startChunk = $endChunk, $endChunk +
= $chunk +
1)
607 // output all articles in category
608 for ($index = $startChunk ;
609 $index < $endChunk && $index < count($articles);
612 // check for change of starting letter or begging of chunk
613 if ( ($articles_start_char[$index] != $articles_start_char[$index - 1])
614 ||
($index == $startChunk) )
616 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
619 $r .= "<li>{$articles[$index]}</li>";
625 $r .= '</tr></table>';
626 } elseif ( count($articles) > 0) {
627 // for short lists of articles in categories.
628 $ti = $this->mTitle
->getText() ;
630 $r .= '<h3>'.$articles_start_char[0]."</h3>\n";
631 $r .= '<ul><li>'.$articles[0].'</li>';
632 for ($index = 1; $index < count($articles); $index++
)
634 if ($articles_start_char[$index] != $articles_start_char[$index - 1])
636 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
639 $r .= "<li>{$articles[$index]}</li>";
648 # Return allowed HTML attributes
649 function getHTMLattrs () {
650 $htmlattrs = array( # Allowed attributes--no scripting, etc.
651 'title', 'align', 'lang', 'dir', 'width', 'height',
652 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
653 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
654 /* FONT */ 'type', 'start', 'value', 'compact',
655 /* For various lists, mostly deprecated but safe */
656 'summary', 'width', 'border', 'frame', 'rules',
657 'cellspacing', 'cellpadding', 'valign', 'char',
658 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
659 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
660 'id', 'class', 'name', 'style' /* For CSS */
665 # Remove non approved attributes and javascript in css
666 function fixTagAttributes ( $t ) {
667 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
668 $htmlattrs = $this->getHTMLattrs() ;
670 # Strip non-approved attributes from the tag
672 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
673 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
676 $t = str_replace ( "<></>" , "" , $t ) ; # This should fix bug 980557
678 # Strip javascript "expression" from stylesheets. Brute force approach:
679 # If anythin offensive is found, all attributes of the HTML tag are dropped
682 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
683 wfMungeToUtf8( $t ) ) )
691 # interface with html tidy, used if $wgUseTidy = true
692 function tidy ( $text ) {
693 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
694 global $wgInputEncoding, $wgOutputEncoding;
695 $fname = 'Parser::tidy';
696 wfProfileIn( $fname );
699 switch(strtoupper($wgOutputEncoding)) {
701 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)?
' -latin1':' -raw';
704 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)?
' -utf8':' -raw';
707 $wgTidyOpts .= ' -raw';
710 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
711 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
712 '<head><title>test</title></head><body>'.$text.'</body></html>';
713 $descriptorspec = array(
714 0 => array('pipe', 'r'),
715 1 => array('pipe', 'w'),
716 2 => array('file', '/dev/null', 'a')
718 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
719 if (is_resource($process)) {
720 fwrite($pipes[0], $wrappedtext);
722 while (!feof($pipes[1])) {
723 $cleansource .= fgets($pipes[1], 1024);
726 $return_value = proc_close($process);
729 wfProfileOut( $fname );
731 if( $cleansource == '' && $text != '') {
732 wfDebug( "Tidy error detected!\n" );
733 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
739 # parse the wiki syntax used to render tables
740 function doTableStuff ( $t ) {
741 $fname = 'Parser::doTableStuff';
742 wfProfileIn( $fname );
744 $t = explode ( "\n" , $t ) ;
745 $td = array () ; # Is currently a td tag open?
746 $ltd = array () ; # Was it TD or TH?
747 $tr = array () ; # Is currently a tr tag open?
748 $ltr = array () ; # tr attributes
749 $indent_level = 0; # indent level of the table
750 foreach ( $t AS $k => $x )
753 $fc = substr ( $x , 0 , 1 ) ;
754 if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) )
756 $indent_level = strlen( $matches[1] );
758 str_repeat( "<dl><dd>", $indent_level ) .
759 "<table " . $this->fixTagAttributes ( $matches[2] ) . '>' ;
760 array_push ( $td , false ) ;
761 array_push ( $ltd , '' ) ;
762 array_push ( $tr , false ) ;
763 array_push ( $ltr , '' ) ;
765 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
766 else if ( '|}' == substr ( $x , 0 , 2 ) )
769 $l = array_pop ( $ltd ) ;
770 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
771 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
773 $t[$k] = $z . str_repeat( "</dd></dl>", $indent_level );
775 else if ( '|-' == substr ( $x , 0 , 2 ) ) # Allows for |---------------
777 $x = substr ( $x , 1 ) ;
778 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
780 $l = array_pop ( $ltd ) ;
781 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
782 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
785 array_push ( $tr , false ) ;
786 array_push ( $td , false ) ;
787 array_push ( $ltd , '' ) ;
788 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
790 else if ( '|' == $fc ||
'!' == $fc ||
'|+' == substr ( $x , 0 , 2 ) ) # Caption
792 if ( '|+' == substr ( $x , 0 , 2 ) )
795 $x = substr ( $x , 1 ) ;
797 $after = substr ( $x , 1 ) ;
798 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
799 $after = explode ( '||' , $after ) ;
801 foreach ( $after AS $theline )
806 $tra = array_pop ( $ltr ) ;
807 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
808 array_push ( $tr , true ) ;
809 array_push ( $ltr , '' ) ;
812 $l = array_pop ( $ltd ) ;
813 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
814 if ( $fc == '|' ) $l = 'td' ;
815 else if ( $fc == '!' ) $l = 'th' ;
816 else if ( $fc == '+' ) $l = 'caption' ;
818 array_push ( $ltd , $l ) ;
819 $y = explode ( '|' , $theline , 2 ) ;
820 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
821 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
823 array_push ( $td , true ) ;
828 # Closing open td, tr && table
829 while ( count ( $td ) > 0 )
831 if ( array_pop ( $td ) ) $t[] = '</td>' ;
832 if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
836 $t = implode ( "\n" , $t ) ;
837 # $t = $this->removeHTMLtags( $t );
838 wfProfileOut( $fname );
842 # Parses the text and adds the result to the strip state
843 # Returns the strip tag
844 function stripParse( $text, $newline, $args ) {
845 $text = $this->strip( $text, $this->mStripState
);
846 $text = $this->internalParse( $text, (bool)$newline, $args, false );
847 return $newline.$this->insertStripItem( $text, $this->mStripState
);
850 function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
851 $fname = 'Parser::internalParse';
852 wfProfileIn( $fname );
854 $text = $this->removeHTMLtags( $text );
855 $text = $this->replaceVariables( $text, $args );
857 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
859 $text = $this->doHeadings( $text );
860 if($this->mOptions
->getUseDynamicDates()) {
861 global $wgDateFormatter;
862 $text = $wgDateFormatter->reformat( $this->mOptions
->getDateFormat(), $text );
864 $text = $this->doAllQuotes( $text );
865 $text = $this->replaceExternalLinks( $text );
866 $text = $this->doMagicLinks( $text );
867 $text = $this->replaceInternalLinks ( $text );
868 $text = $this->replaceInternalLinks ( $text );
870 $text = $this->unstrip( $text, $this->mStripState
);
871 $text = $this->unstripNoWiki( $text, $this->mStripState
);
873 $text = $this->doTableStuff( $text );
874 $text = $this->formatHeadings( $text, $isMain );
875 $sk =& $this->mOptions
->getSkin();
876 $text = $sk->transformContent( $text );
878 if ( $isMain && !isset ( $this->categoryMagicDone
) ) {
879 $text .= $this->categoryMagic () ;
880 $this->categoryMagicDone
= true ;
883 wfProfileOut( $fname );
887 /* private */ function &doMagicLinks( &$text ) {
888 global $wgUseGeoMode;
889 $text = $this->magicISBN( $text );
890 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
891 $text = $this->magicGEO( $text );
893 $text = $this->magicRFC( $text );
897 # Parse ^^ tokens and return html
898 /* private */ function doExponent ( $text ) {
899 $fname = 'Parser::doExponent';
900 wfProfileIn( $fname);
901 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
902 wfProfileOut( $fname);
906 # Parse headers and return html
907 /* private */ function doHeadings( $text ) {
908 $fname = 'Parser::doHeadings';
909 wfProfileIn( $fname );
910 for ( $i = 6; $i >= 1; --$i ) {
911 $h = substr( '======', 0, $i );
912 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
913 "<h{$i}>\\1</h{$i}>\\2", $text );
915 wfProfileOut( $fname );
919 /* private */ function doAllQuotes( $text ) {
920 $fname = 'Parser::doAllQuotes';
921 wfProfileIn( $fname );
923 $lines = explode( "\n", $text );
924 foreach ( $lines as $line ) {
925 $outtext .= $this->doQuotes ( $line ) . "\n";
927 $outtext = substr($outtext, 0,-1);
928 wfProfileOut( $fname );
932 /* private */ function doQuotes( $text ) {
933 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE
);
934 if (count ($arr) == 1)
938 # First, do some preliminary work. This may shift some apostrophes from
939 # being mark-up to being text. It also counts the number of occurrences
940 # of bold and italics mark-ups.
948 # If there are ever four apostrophes, assume the first is supposed to
949 # be text, and the remaining three constitute mark-up for bold text.
950 if (strlen ($arr[$i]) == 4)
955 # If there are more than 5 apostrophes in a row, assume they're all
956 # text except for the last 5.
957 else if (strlen ($arr[$i]) > 5)
959 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
962 # Count the number of occurrences of bold and italics mark-ups.
963 # We are not counting sequences of five apostrophes.
964 if (strlen ($arr[$i]) == 2) $numitalics++
; else
965 if (strlen ($arr[$i]) == 3) $numbold++
; else
966 if (strlen ($arr[$i]) == 5) { $numitalics++
; $numbold++
; }
971 # If there is an odd number of both bold and italics, it is likely
972 # that one of the bold ones was meant to be an apostrophe followed
973 # by italics. Which one we cannot know for certain, but it is more
974 # likely to be one that has a single-letter word before it.
975 if (($numbold %
2 == 1) && ($numitalics %
2 == 1))
978 $firstsingleletterword = -1;
979 $firstmultiletterword = -1;
983 if (($i %
2 == 1) and (strlen ($r) == 3))
985 $x1 = substr ($arr[$i-1], -1);
986 $x2 = substr ($arr[$i-1], -2, 1);
988 if ($firstspace == -1) $firstspace = $i;
989 } else if ($x2 == " ") {
990 if ($firstsingleletterword == -1) $firstsingleletterword = $i;
992 if ($firstmultiletterword == -1) $firstmultiletterword = $i;
998 # If there is a single-letter word, use it!
999 if ($firstsingleletterword > -1)
1001 $arr [ $firstsingleletterword ] = "''";
1002 $arr [ $firstsingleletterword-1 ] .= "'";
1004 # If not, but there's a multi-letter word, use that one.
1005 else if ($firstmultiletterword > -1)
1007 $arr [ $firstmultiletterword ] = "''";
1008 $arr [ $firstmultiletterword-1 ] .= "'";
1010 # ... otherwise use the first one that has neither.
1011 # (notice that it is possible for all three to be -1 if, for example,
1012 # there is only one pentuple-apostrophe in the line)
1013 else if ($firstspace > -1)
1015 $arr [ $firstspace ] = "''";
1016 $arr [ $firstspace-1 ] .= "'";
1020 # Now let's actually convert our apostrophic mush to HTML!
1025 foreach ($arr as $r)
1029 if ($state == 'both')
1036 if (strlen ($r) == 2)
1039 { $output .= "</em>"; $state = ''; }
1040 else if ($state == 'strongem')
1041 { $output .= "</em>"; $state = 'strong'; }
1042 else if ($state == 'emstrong')
1043 { $output .= "</strong></em><strong>"; $state = 'strong'; }
1044 else if ($state == 'both')
1045 { $output .= "<strong><em>{$buffer}</em>"; $state = 'strong'; }
1046 else # $state can be 'strong' or ''
1047 { $output .= "<em>"; $state .= 'em'; }
1049 else if (strlen ($r) == 3)
1051 if ($state == 'strong')
1052 { $output .= "</strong>"; $state = ''; }
1053 else if ($state == 'strongem')
1054 { $output .= "</em></strong><em>"; $state = 'em'; }
1055 else if ($state == 'emstrong')
1056 { $output .= "</strong>"; $state = 'em'; }
1057 else if ($state == 'both')
1058 { $output .= "<em><strong>{$buffer}</strong>"; $state = 'em'; }
1059 else # $state can be 'em' or ''
1060 { $output .= "<strong>"; $state .= 'strong'; }
1062 else if (strlen ($r) == 5)
1064 if ($state == 'strong')
1065 { $output .= "</strong><em>"; $state = 'em'; }
1066 else if ($state == 'em')
1067 { $output .= "</em><strong>"; $state = 'strong'; }
1068 else if ($state == 'strongem')
1069 { $output .= "</em></strong>"; $state = ''; }
1070 else if ($state == 'emstrong')
1071 { $output .= "</strong></em>"; $state = ''; }
1072 else if ($state == 'both')
1073 { $output .= "<em><strong>{$buffer}</strong></em>"; $state = ''; }
1074 else # ($state == '')
1075 { $buffer = ''; $state = 'both'; }
1080 # Now close all remaining tags. Notice that the order is important.
1081 if ($state == 'strong' ||
$state == 'emstrong')
1082 $output .= '</strong>';
1083 if ($state == 'em' ||
$state == 'strongem' ||
$state == 'emstrong')
1085 if ($state == 'strongem')
1086 $output .= '</strong>';
1087 if ($state == 'both')
1088 $output .= "<strong><em>{$buffer}</em></strong>";
1093 # Note: we have to do external links before the internal ones,
1094 # and otherwise take great care in the order of things here, so
1095 # that we don't end up interpreting some URLs twice.
1097 /* private */ function replaceExternalLinks( $text ) {
1098 $fname = 'Parser::replaceExternalLinks';
1099 wfProfileIn( $fname );
1101 $sk =& $this->mOptions
->getSkin();
1102 $linktrail = wfMsg('linktrail');
1103 $bits = preg_split( EXT_LINK_BRACKETED
, $text, -1, PREG_SPLIT_DELIM_CAPTURE
);
1105 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
1108 while ( $i<count( $bits ) ) {
1110 $protocol = $bits[$i++
];
1111 $text = $bits[$i++
];
1112 $trail = $bits[$i++
];
1114 # If the link text is an image URL, replace it with an <img> tag
1115 # This happened by accident in the original parser, but some people used it extensively
1116 $img = $this->maybeMakeImageLink( $text );
1117 if ( $img !== false ) {
1123 # No link text, e.g. [http://domain.tld/some.link]
1124 if ( $text == '' ) {
1125 # Autonumber if allowed
1126 if ( strpos( HTTP_PROTOCOLS
, $protocol ) !== false ) {
1127 $text = "[" . ++
$this->mAutonumber
. "]";
1129 # Otherwise just use the URL
1130 $text = htmlspecialchars( $url );
1133 # Have link text, e.g. [http://domain.tld/some.link text]s
1135 if ( preg_match( $linktrail, $trail, $m2 ) ) {
1141 $encUrl = htmlspecialchars( $url );
1142 # Bit in parentheses showing the URL for the printable version
1143 if( $url == $text ||
preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $url ) ) {
1146 # Expand the URL for printable version
1147 if ( ! $sk->suppressUrlExpansion() ) {
1148 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
1154 # Process the trail (i.e. everything after this link up until start of the next link),
1155 # replacing any non-bracketed links
1156 $trail = $this->replaceFreeExternalLinks( $trail );
1158 $la = $sk->getExternalLinkAttributes( $url, $text );
1160 # Use the encoded URL
1161 # This means that users can paste URLs directly into the text
1162 # Funny characters like ö aren't valid in URLs anyway
1163 # This was changed in August 2004
1164 $s .= "<a href=\"{$url}\" {$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
1167 wfProfileOut( $fname );
1171 # Replace anything that looks like a URL with a link
1172 function replaceFreeExternalLinks( $text ) {
1173 $bits = preg_split( '/((?:'.URL_PROTOCOLS
.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE
);
1174 $s = array_shift( $bits );
1177 $sk =& $this->mOptions
->getSkin();
1179 while ( $i < count( $bits ) ){
1180 $protocol = $bits[$i++
];
1181 $remainder = $bits[$i++
];
1183 if ( preg_match( '/^('.EXT_LINK_URL_CLASS
.'+)(.*)$/s', $remainder, $m ) ) {
1184 # Found some characters after the protocol that look promising
1185 $url = $protocol . $m[1];
1188 # Move trailing punctuation to $trail
1190 # If there is no left bracket, then consider right brackets fair game too
1191 if ( strpos( $url, '(' ) === false ) {
1195 $numSepChars = strspn( strrev( $url ), $sep );
1196 if ( $numSepChars ) {
1197 $trail = substr( $url, -$numSepChars ) . $trail;
1198 $url = substr( $url, 0, -$numSepChars );
1201 # Replace & from obsolete syntax with &
1202 $url = str_replace( '&', '&', $url );
1204 # Is this an external image?
1205 $text = $this->maybeMakeImageLink( $url );
1206 if ( $text === false ) {
1207 # Not an image, make a link
1208 $text = $sk->makeExternalLink( $url, $url );
1210 $s .= $text . $trail;
1212 $s .= $protocol . $remainder;
1218 # make an image if it's allowed
1219 function maybeMakeImageLink( $url ) {
1220 $sk =& $this->mOptions
->getSkin();
1222 if ( $this->mOptions
->getAllowExternalImages() ) {
1223 if ( preg_match( EXT_IMAGE_REGEX
, $url ) ) {
1225 $text = $sk->makeImage( htmlspecialchars( $url ) );
1231 /* private */ function replaceInternalLinks( $s ) {
1232 global $wgLang, $wgLinkCache;
1233 global $wgNamespacesWithSubpages, $wgLanguageCode;
1234 static $fname = 'Parser::replaceInternalLinks' ;
1235 wfProfileIn( $fname );
1237 wfProfileIn( $fname.'-setup' );
1239 # the % is needed to support urlencoded titles as well
1240 if ( !$tc ) { $tc = Title
::legalChars() . '#%'; }
1241 $sk =& $this->mOptions
->getSkin();
1243 $redirect = MagicWord
::get ( MAG_REDIRECT
) ;
1245 $a = explode( '[[', ' ' . $s );
1246 $s = array_shift( $a );
1247 $s = substr( $s, 1 );
1249 # Match a link having the form [[namespace:link|alternate]]trail
1251 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1252 # Match the end of a line for a word that's not followed by whitespace,
1253 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1254 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1256 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
1257 # Special and Media are pseudo-namespaces; no pages actually exist in them
1259 $nottalk = !Namespace::isTalk( $this->mTitle
->getNamespace() );
1261 if ( $useLinkPrefixExtension ) {
1262 if ( preg_match( $e2, $s, $m ) ) {
1263 $first_prefix = $m[2];
1266 $first_prefix = false;
1272 wfProfileOut( $fname.'-setup' );
1274 foreach ( $a as $line ) {
1275 wfProfileIn( $fname.'-prefixhandling' );
1276 if ( $useLinkPrefixExtension ) {
1277 if ( preg_match( $e2, $s, $m ) ) {
1285 $prefix = $first_prefix;
1286 $first_prefix = false;
1289 wfProfileOut( $fname.'-prefixhandling' );
1291 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1293 # fix up urlencoded title texts
1294 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1296 } else { # Invalid form; output directly
1297 $s .= $prefix . '[[' . $line ;
1301 /* Valid link forms:
1303 :Foobar -- override special treatment of prefix (images, language links)
1304 /Foobar -- convert to CurrentPage/Foobar
1305 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1307 $c = substr($m[1],0,1);
1308 $noforce = ($c != ':');
1309 if( $c == '/' ) { # subpage
1310 if(substr($m[1],-1,1)=='/') { # / at end means we don't want the slash to be shown
1311 $m[1]=substr($m[1],1,strlen($m[1])-2);
1314 $noslash=substr($m[1],1);
1316 if(!empty($wgNamespacesWithSubpages[$this->mTitle
->getNamespace()])) { # subpages allowed here
1317 $link = $this->mTitle
->getPrefixedText(). '/' . trim($noslash);
1320 } # this might be changed for ugliness reasons
1322 $link = $noslash; # no subpage allowed, use standard link
1324 } elseif( $noforce ) { # no subpage
1327 $link = substr( $m[1], 1 );
1329 $wasblank = ( '' == $text );
1333 $nt = Title
::newFromText( $link );
1335 $s .= $prefix . '[[' . $line;
1338 $ns = $nt->getNamespace();
1339 $iw = $nt->getInterWiki();
1341 if( $iw && $this->mOptions
->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1342 array_push( $this->mOutput
->mLanguageLinks
, $nt->getFullText() );
1343 $tmp = $prefix . $trail ;
1344 $s .= (trim($tmp) == '')?
'': $tmp;
1347 if ( $ns == NS_IMAGE
) {
1348 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1349 $wgLinkCache->addImageLinkObj( $nt );
1352 if ( $ns == NS_CATEGORY
) {
1353 $t = $nt->getText() ;
1354 $nnt = Title
::newFromText ( Namespace::getCanonicalName(NS_CATEGORY
).":".$t ) ;
1356 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1357 $pPLC=$sk->postParseLinkColour();
1358 $sk->postParseLinkColour( false );
1359 $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1360 $sk->postParseLinkColour( $pPLC );
1361 $wgLinkCache->resume();
1363 $sortkey = $wasblank ?
$this->mTitle
->getPrefixedText() : $text;
1364 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1365 $this->mOutput
->mCategoryLinks
[] = $t ;
1366 $s .= $prefix . $trail ;
1370 if( ( $nt->getPrefixedText() == $this->mTitle
->getPrefixedText() ) &&
1371 ( strpos( $link, '#' ) == FALSE ) ) {
1372 # Self-links are handled specially; generally de-link and change to bold.
1373 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1377 if( $ns == NS_MEDIA
) {
1378 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1379 $wgLinkCache->addImageLinkObj( $nt );
1381 } elseif( $ns == NS_SPECIAL
) {
1382 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1385 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1387 wfProfileOut( $fname );
1391 # Some functions here used by doBlockLevels()
1393 /* private */ function closeParagraph() {
1395 if ( '' != $this->mLastSection
) {
1396 $result = '</' . $this->mLastSection
. ">\n";
1398 $this->mInPre
= false;
1399 $this->mLastSection
= '';
1402 # getCommon() returns the length of the longest common substring
1403 # of both arguments, starting at the beginning of both.
1405 /* private */ function getCommon( $st1, $st2 ) {
1406 $fl = strlen( $st1 );
1407 $shorter = strlen( $st2 );
1408 if ( $fl < $shorter ) { $shorter = $fl; }
1410 for ( $i = 0; $i < $shorter; ++
$i ) {
1411 if ( $st1{$i} != $st2{$i} ) { break; }
1415 # These next three functions open, continue, and close the list
1416 # element appropriate to the prefix character passed into them.
1418 /* private */ function openList( $char ) {
1419 $result = $this->closeParagraph();
1421 if ( '*' == $char ) { $result .= '<ul><li>'; }
1422 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1423 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1424 else if ( ';' == $char ) {
1425 $result .= '<dl><dt>';
1426 $this->mDTopen
= true;
1428 else { $result = '<!-- ERR 1 -->'; }
1433 /* private */ function nextItem( $char ) {
1434 if ( '*' == $char ||
'#' == $char ) { return '</li><li>'; }
1435 else if ( ':' == $char ||
';' == $char ) {
1437 if ( $this->mDTopen
) { $close = '</dt>'; }
1438 if ( ';' == $char ) {
1439 $this->mDTopen
= true;
1440 return $close . '<dt>';
1442 $this->mDTopen
= false;
1443 return $close . '<dd>';
1446 return '<!-- ERR 2 -->';
1449 /* private */function closeList( $char ) {
1450 if ( '*' == $char ) { $text = '</li></ul>'; }
1451 else if ( '#' == $char ) { $text = '</li></ol>'; }
1452 else if ( ':' == $char ) {
1453 if ( $this->mDTopen
) {
1454 $this->mDTopen
= false;
1455 $text = '</dt></dl>';
1457 $text = '</dd></dl>';
1460 else { return '<!-- ERR 3 -->'; }
1464 /* private */ function doBlockLevels( $text, $linestart ) {
1465 $fname = 'Parser::doBlockLevels';
1466 wfProfileIn( $fname );
1468 # Parsing through the text line by line. The main thing
1469 # happening here is handling of block-level elements p, pre,
1470 # and making lists from lines starting with * # : etc.
1472 $textLines = explode( "\n", $text );
1474 $lastPrefix = $output = $lastLine = '';
1475 $this->mDTopen
= $inBlockElem = false;
1477 $paragraphStack = false;
1479 if ( !$linestart ) {
1480 $output .= array_shift( $textLines );
1482 foreach ( $textLines as $oLine ) {
1483 $lastPrefixLength = strlen( $lastPrefix );
1484 $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1485 $preOpenMatch = preg_match('/<pre/i', $oLine );
1486 if ( !$this->mInPre
) {
1487 # Multiple prefixes may abut each other for nested lists.
1488 $prefixLength = strspn( $oLine, '*#:;' );
1489 $pref = substr( $oLine, 0, $prefixLength );
1492 $pref2 = str_replace( ';', ':', $pref );
1493 $t = substr( $oLine, $prefixLength );
1494 $this->mInPre
= !empty($preOpenMatch);
1496 # Don't interpret any other prefixes in preformatted text
1498 $pref = $pref2 = '';
1503 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1504 # Same as the last item, so no need to deal with nesting or opening stuff
1505 $output .= $this->nextItem( substr( $pref, -1 ) );
1506 $paragraphStack = false;
1508 if ( substr( $pref, -1 ) == ';') {
1509 # The one nasty exception: definition lists work like this:
1510 # ; title : definition text
1511 # So we check for : in the remainder text to split up the
1512 # title and definition, without b0rking links.
1513 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1514 if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
1516 $output .= $term . $this->nextItem( ':' );
1520 } elseif( $prefixLength ||
$lastPrefixLength ) {
1521 # Either open or close a level...
1522 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1523 $paragraphStack = false;
1525 while( $commonPrefixLength < $lastPrefixLength ) {
1526 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1527 --$lastPrefixLength;
1529 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1530 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1532 while ( $prefixLength > $commonPrefixLength ) {
1533 $char = substr( $pref, $commonPrefixLength, 1 );
1534 $output .= $this->openList( $char );
1536 if ( ';' == $char ) {
1537 # FIXME: This is dupe of code above
1538 if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
1540 $output .= $term . $this->nextItem( ":" );
1544 ++
$commonPrefixLength;
1546 $lastPrefix = $pref2;
1548 if( 0 == $prefixLength ) {
1549 # No prefix (not in list)--go to paragraph mode
1550 $uniq_prefix = UNIQ_PREFIX
;
1551 // XXX: use a stack for nestable elements like span, table and div
1552 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1553 $closematch = preg_match(
1554 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1555 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1556 if ( $openmatch or $closematch ) {
1557 $paragraphStack = false;
1558 $output .= $this->closeParagraph();
1559 if($preOpenMatch and !$preCloseMatch) {
1560 $this->mInPre
= true;
1562 if ( $closematch ) {
1563 $inBlockElem = false;
1565 $inBlockElem = true;
1567 } else if ( !$inBlockElem && !$this->mInPre
) {
1568 if ( " " == $t{0} and ( $this->mLastSection
== 'pre' or trim($t) != '' ) ) {
1570 if ($this->mLastSection
!= 'pre') {
1571 $paragraphStack = false;
1572 $output .= $this->closeParagraph().'<pre>';
1573 $this->mLastSection
= 'pre';
1577 if ( '' == trim($t) ) {
1578 if ( $paragraphStack ) {
1579 $output .= $paragraphStack.'<br />';
1580 $paragraphStack = false;
1581 $this->mLastSection
= 'p';
1583 if ($this->mLastSection
!= 'p' ) {
1584 $output .= $this->closeParagraph();
1585 $this->mLastSection
= '';
1586 $paragraphStack = '<p>';
1588 $paragraphStack = '</p><p>';
1592 if ( $paragraphStack ) {
1593 $output .= $paragraphStack;
1594 $paragraphStack = false;
1595 $this->mLastSection
= 'p';
1596 } else if ($this->mLastSection
!= 'p') {
1597 $output .= $this->closeParagraph().'<p>';
1598 $this->mLastSection
= 'p';
1604 if ($paragraphStack === false) {
1608 while ( $prefixLength ) {
1609 $output .= $this->closeList( $pref2{$prefixLength-1} );
1612 if ( '' != $this->mLastSection
) {
1613 $output .= '</' . $this->mLastSection
. '>';
1614 $this->mLastSection
= '';
1617 wfProfileOut( $fname );
1621 # Return value of a magic variable (like PAGENAME)
1622 function getVariableValue( $index ) {
1623 global $wgLang, $wgSitename, $wgServer;
1626 case MAG_CURRENTMONTH
:
1627 return $wgLang->formatNum( date( 'm' ) );
1628 case MAG_CURRENTMONTHNAME
:
1629 return $wgLang->getMonthName( date('n') );
1630 case MAG_CURRENTMONTHNAMEGEN
:
1631 return $wgLang->getMonthNameGen( date('n') );
1632 case MAG_CURRENTDAY
:
1633 return $wgLang->formatNum( date('j') );
1635 return $this->mTitle
->getText();
1637 return $this->mTitle
->getPartialURL();
1639 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1640 return $wgLang->getNsText($this->mTitle
->getNamespace()); # Patch by Dori
1641 case MAG_CURRENTDAYNAME
:
1642 return $wgLang->getWeekdayName( date('w')+
1 );
1643 case MAG_CURRENTYEAR
:
1644 return $wgLang->formatNum( date( 'Y' ) );
1645 case MAG_CURRENTTIME
:
1646 return $wgLang->time( wfTimestampNow(), false );
1647 case MAG_NUMBEROFARTICLES
:
1648 return $wgLang->formatNum( wfNumberOfArticles() );
1658 # initialise the magic variables (like CURRENTMONTHNAME)
1659 function initialiseVariables() {
1660 global $wgVariableIDs;
1661 $this->mVariables
= array();
1662 foreach ( $wgVariableIDs as $id ) {
1663 $mw =& MagicWord
::get( $id );
1664 $mw->addToArray( $this->mVariables
, $this->getVariableValue( $id ) );
1668 /* private */ function replaceVariables( $text, $args = array() ) {
1669 global $wgLang, $wgScript, $wgArticlePath;
1671 # Prevent too big inclusions
1672 if(strlen($text)> MAX_INCLUDE_SIZE
)
1675 $fname = 'Parser::replaceVariables';
1676 wfProfileIn( $fname );
1679 $titleChars = Title
::legalChars();
1680 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars );
1682 # This function is called recursively. To keep track of arguments we need a stack:
1683 array_push( $this->mArgStack
, $args );
1685 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1686 $GLOBALS['wgCurParser'] =& $this;
1689 if ( $this->mOutputType
== OT_HTML
) {
1690 # Variable substitution
1691 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
1693 # Argument substitution
1694 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1696 # Template substitution
1697 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
1698 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1700 array_pop( $this->mArgStack
);
1702 wfProfileOut( $fname );
1706 function variableSubstitution( $matches ) {
1707 if ( !$this->mVariables
) {
1708 $this->initialiseVariables();
1710 if ( array_key_exists( $matches[1], $this->mVariables
) ) {
1711 $text = $this->mVariables
[$matches[1]];
1712 $this->mOutput
->mContainsOldMagic
= true;
1714 $text = $matches[0];
1719 # Split template arguments
1720 function getTemplateArgs( $argsString ) {
1721 if ( $argsString === '' ) {
1725 $args = explode( '|', substr( $argsString, 1 ) );
1727 # If any of the arguments contains a '[[' but no ']]', it needs to be
1728 # merged with the next arg because the '|' character between belongs
1729 # to the link syntax and not the template parameter syntax.
1730 $argc = count($args);
1732 for ( $i = 0; $i < $argc-1; $i++
) {
1733 if ( substr_count ( $args[$i], "[[" ) != substr_count ( $args[$i], "]]" ) ) {
1734 $args[$i] .= "|".$args[$i+
1];
1735 array_splice($args, $i+
1, 1);
1744 function braceSubstitution( $matches ) {
1745 global $wgLinkCache, $wgLang;
1746 $fname = 'Parser::braceSubstitution';
1753 # $newline is an optional newline character before the braces
1754 # $part1 is the bit before the first |, and must contain only title characters
1755 # $args is a list of arguments, starting from index 0, not including $part1
1757 $newline = $matches[1];
1758 $part1 = $matches[2];
1759 # If the third subpattern matched anything, it will start with |
1761 $args = $this->getTemplateArgs($matches[3]);
1762 $argc = count( $args );
1765 if ( strpos( $matches[0], '{{{' ) !== false ) {
1766 $text = $matches[0];
1773 $mwSubst =& MagicWord
::get( MAG_SUBST
);
1774 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1775 if ( $this->mOutputType
!= OT_WIKI
) {
1776 # Invalid SUBST not replaced at PST time
1777 # Return without further processing
1778 $text = $matches[0];
1782 } elseif ( $this->mOutputType
== OT_WIKI
) {
1783 # SUBST not found in PST pass, do nothing
1784 $text = $matches[0];
1789 # MSG, MSGNW and INT
1792 $mwMsgnw =& MagicWord
::get( MAG_MSGNW
);
1793 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1796 # Remove obsolete MSG:
1797 $mwMsg =& MagicWord
::get( MAG_MSG
);
1798 $mwMsg->matchStartAndRemove( $part1 );
1801 # Check if it is an internal message
1802 $mwInt =& MagicWord
::get( MAG_INT
);
1803 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1804 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1805 $text = wfMsgReal( $part1, $args, true );
1813 # Check for NS: (namespace expansion)
1814 $mwNs = MagicWord
::get( MAG_NS
);
1815 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1816 if ( intval( $part1 ) ) {
1817 $text = $wgLang->getNsText( intval( $part1 ) );
1820 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1821 if ( !is_null( $index ) ) {
1822 $text = $wgLang->getNsText( $index );
1829 # LOCALURL and LOCALURLE
1831 $mwLocal = MagicWord
::get( MAG_LOCALURL
);
1832 $mwLocalE = MagicWord
::get( MAG_LOCALURLE
);
1834 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1835 $func = 'getLocalURL';
1836 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1837 $func = 'escapeLocalURL';
1842 if ( $func !== '' ) {
1843 $title = Title
::newFromText( $part1 );
1844 if ( !is_null( $title ) ) {
1846 $text = $title->$func( $args[0] );
1848 $text = $title->$func();
1855 # Internal variables
1856 if ( !$this->mVariables
) {
1857 $this->initialiseVariables();
1859 if ( !$found && array_key_exists( $part1, $this->mVariables
) ) {
1860 $text = $this->mVariables
[$part1];
1862 $this->mOutput
->mContainsOldMagic
= true;
1865 # Template table test
1867 # Did we encounter this template already? If yes, it is in the cache
1868 # and we need to check for loops.
1869 if ( isset( $this->mTemplates
[$part1] ) ) {
1870 # Infinite loop test
1871 if ( isset( $this->mTemplatePath
[$part1] ) ) {
1875 # set $text to cached message.
1876 $text = $this->mTemplates
[$part1];
1880 # Load from database
1882 $title = Title
::newFromText( $part1, NS_TEMPLATE
);
1883 if ( !is_null( $title ) && !$title->isExternal() ) {
1884 # Check for excessive inclusion
1885 $dbk = $title->getPrefixedDBkey();
1886 if ( $this->incrementIncludeCount( $dbk ) ) {
1887 # This should never be reached.
1888 $article = new Article( $title );
1889 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1890 if ( $articleContent !== false ) {
1892 $text = $articleContent;
1897 # If the title is valid but undisplayable, make a link to it
1898 if ( $this->mOutputType
== OT_HTML
&& !$found ) {
1899 $text = '[[' . $title->getPrefixedText() . ']]';
1903 # Template cache array insertion
1904 $this->mTemplates
[$part1] = $text;
1908 # Recursive parsing, escaping and link table handling
1909 # Only for HTML output
1910 if ( $nowiki && $found && $this->mOutputType
== OT_HTML
) {
1911 $text = wfEscapeWikiText( $text );
1912 } elseif ( $this->mOutputType
== OT_HTML
&& $found && !$noparse) {
1913 # Clean up argument array
1914 $assocArgs = array();
1916 foreach( $args as $arg ) {
1917 $eqpos = strpos( $arg, '=' );
1918 if ( $eqpos === false ) {
1919 $assocArgs[$index++
] = $arg;
1921 $name = trim( substr( $arg, 0, $eqpos ) );
1922 $value = trim( substr( $arg, $eqpos+
1 ) );
1923 if ( $value === false ) {
1926 if ( $name !== false ) {
1927 $assocArgs[$name] = $value;
1932 # Do not enter included links in link table
1933 if ( !is_null( $title ) ) {
1934 $wgLinkCache->suspend();
1937 # Add a new element to the templace recursion path
1938 $this->mTemplatePath
[$part1] = 1;
1940 $text = $this->stripParse( $text, $newline, $assocArgs );
1942 # Resume the link cache and register the inclusion as a link
1943 if ( !is_null( $title ) ) {
1944 $wgLinkCache->resume();
1945 $wgLinkCache->addLinkObj( $title );
1948 # Empties the template path
1949 $this->mTemplatePath
= array();
1958 # Triple brace replacement -- used for template arguments
1959 function argSubstitution( $matches ) {
1960 $newline = $matches[1];
1961 $arg = trim( $matches[2] );
1962 $text = $matches[0];
1963 $inputArgs = end( $this->mArgStack
);
1965 if ( array_key_exists( $arg, $inputArgs ) ) {
1966 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1972 # Returns true if the function is allowed to include this entity
1973 function incrementIncludeCount( $dbk ) {
1974 if ( !array_key_exists( $dbk, $this->mIncludeCount
) ) {
1975 $this->mIncludeCount
[$dbk] = 0;
1977 if ( ++
$this->mIncludeCount
[$dbk] <= MAX_INCLUDE_REPEAT
) {
1985 # Cleans up HTML, removes dangerous tags and attributes
1986 /* private */ function removeHTMLtags( $text ) {
1987 global $wgUseTidy, $wgUserHtml;
1988 $fname = 'Parser::removeHTMLtags';
1989 wfProfileIn( $fname );
1992 $htmlpairs = array( # Tags that must be closed
1993 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
1994 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
1995 'strike', 'strong', 'tt', 'var', 'div', 'center',
1996 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
1997 'ruby', 'rt' , 'rb' , 'rp', 'p'
1999 $htmlsingle = array(
2000 'br', 'hr', 'li', 'dt', 'dd'
2002 $htmlnest = array( # Tags that can be nested--??
2003 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
2004 'dl', 'font', 'big', 'small', 'sub', 'sup'
2006 $tabletags = array( # Can only appear inside table
2010 $htmlpairs = array();
2011 $htmlsingle = array();
2012 $htmlnest = array();
2013 $tabletags = array();
2016 $htmlsingle = array_merge( $tabletags, $htmlsingle );
2017 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
2019 $htmlattrs = $this->getHTMLattrs () ;
2021 # Remove HTML comments
2022 $text = preg_replace( '/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU', '$2', $text );
2024 $bits = explode( '<', $text );
2025 $text = array_shift( $bits );
2027 $tagstack = array(); $tablestack = array();
2028 foreach ( $bits as $x ) {
2029 $prev = error_reporting( E_ALL
& ~
( E_NOTICE | E_WARNING
) );
2030 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2032 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2033 error_reporting( $prev );
2036 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2040 if ( ! in_array( $t, $htmlsingle ) &&
2041 ( $ot = @array_pop
( $tagstack ) ) != $t ) {
2042 @array_push
( $tagstack, $ot );
2045 if ( $t == 'table' ) {
2046 $tagstack = array_pop( $tablestack );
2051 # Keep track for later
2052 if ( in_array( $t, $tabletags ) &&
2053 ! in_array( 'table', $tagstack ) ) {
2055 } else if ( in_array( $t, $tagstack ) &&
2056 ! in_array ( $t , $htmlnest ) ) {
2058 } else if ( ! in_array( $t, $htmlsingle ) ) {
2059 if ( $t == 'table' ) {
2060 array_push( $tablestack, $tagstack );
2061 $tagstack = array();
2063 array_push( $tagstack, $t );
2065 # Strip non-approved attributes from the tag
2066 $newparams = $this->fixTagAttributes($params);
2070 $rest = str_replace( '>', '>', $rest );
2071 $text .= "<$slash$t $newparams$brace$rest";
2075 $text .= '<' . str_replace( '>', '>', $x);
2077 # Close off any remaining tags
2078 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
2080 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2083 # this might be possible using tidy itself
2084 foreach ( $bits as $x ) {
2085 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2087 @list
( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2088 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2089 $newparams = $this->fixTagAttributes($params);
2090 $rest = str_replace( '>', '>', $rest );
2091 $text .= "<$slash$t $newparams$brace$rest";
2093 $text .= '<' . str_replace( '>', '>', $x);
2097 wfProfileOut( $fname );
2104 * This function accomplishes several tasks:
2105 * 1) Auto-number headings if that option is enabled
2106 * 2) Add an [edit] link to sections for logged in users who have enabled the option
2107 * 3) Add a Table of contents on the top for users who have enabled the option
2108 * 4) Auto-anchor headings
2110 * It loops through all headlines, collects the necessary data, then splits up the
2111 * string and re-inserts the newly formatted headlines.
2115 /* private */ function formatHeadings( $text, $isMain=true ) {
2116 global $wgInputEncoding, $wgMaxTocLevel;
2118 $doNumberHeadings = $this->mOptions
->getNumberHeadings();
2119 $doShowToc = $this->mOptions
->getShowToc();
2120 $forceTocHere = false;
2121 if( !$this->mTitle
->userCanEdit() ) {
2123 $rightClickHack = 0;
2125 $showEditLink = $this->mOptions
->getEditSection();
2126 $rightClickHack = $this->mOptions
->getEditSectionOnRightClick();
2129 # Inhibit editsection links if requested in the page
2130 $esw =& MagicWord
::get( MAG_NOEDITSECTION
);
2131 if( $esw->matchAndRemove( $text ) ) {
2134 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2136 $mw =& MagicWord
::get( MAG_NOTOC
);
2137 if( $mw->matchAndRemove( $text ) ) {
2141 # never add the TOC to the Main Page. This is an entry page that should not
2142 # be more than 1-2 screens large anyway
2143 if( $this->mTitle
->getPrefixedText() == wfMsg('mainpage') ) {
2147 # Get all headlines for numbering them and adding funky stuff like [edit]
2148 # links - this is for later, but we need the number of headlines right now
2149 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2151 # if there are fewer than 4 headlines in the article, do not show TOC
2152 if( $numMatches < 4 ) {
2156 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2157 # override above conditions and always show TOC at that place
2158 $mw =& MagicWord
::get( MAG_TOC
);
2159 if ($mw->match( $text ) ) {
2161 $forceTocHere = true;
2163 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2164 # override above conditions and always show TOC above first header
2165 $mw =& MagicWord
::get( MAG_FORCETOC
);
2166 if ($mw->matchAndRemove( $text ) ) {
2173 # We need this to perform operations on the HTML
2174 $sk =& $this->mOptions
->getSkin();
2179 # Ugh .. the TOC should have neat indentation levels which can be
2180 # passed to the skin functions. These are determined here
2185 $sublevelCount = array();
2188 foreach( $matches[3] as $headline ) {
2191 $prevlevel = $level;
2193 $level = $matches[1][$headlineCount];
2194 if( ( $doNumberHeadings ||
$doShowToc ) && $prevlevel && $level > $prevlevel ) {
2195 # reset when we enter a new level
2196 $sublevelCount[$level] = 0;
2197 $toc .= $sk->tocIndent( $level - $prevlevel );
2198 $toclevel +
= $level - $prevlevel;
2200 if( ( $doNumberHeadings ||
$doShowToc ) && $level < $prevlevel ) {
2201 # reset when we step back a level
2202 $sublevelCount[$level+
1]=0;
2203 $toc .= $sk->tocUnindent( $prevlevel - $level );
2204 $toclevel -= $prevlevel - $level;
2206 # count number of headlines for each level
2207 @$sublevelCount[$level]++
;
2208 if( $doNumberHeadings ||
$doShowToc ) {
2210 for( $i = 1; $i <= $level; $i++
) {
2211 if( !empty( $sublevelCount[$i] ) ) {
2215 $numbering .= $sublevelCount[$i];
2221 # The canonized header is a version of the header text safe to use for links
2222 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2223 $canonized_headline = $this->unstrip( $headline, $this->mStripState
);
2224 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState
);
2227 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2228 $tocline = trim( $canonized_headline );
2229 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT
, $wgInputEncoding ) );
2230 $replacearray = array(
2234 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2235 $refer[$headlineCount] = $canonized_headline;
2237 # count how many in assoc. array so we can track dupes in anchors
2238 @$refers[$canonized_headline]++
;
2239 $refcount[$headlineCount]=$refers[$canonized_headline];
2241 # Prepend the number to the heading text
2243 if( $doNumberHeadings ||
$doShowToc ) {
2244 $tocline = $numbering . ' ' . $tocline;
2246 # Don't number the heading if it is the only one (looks silly)
2247 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2248 # the two are different if the line contains a link
2249 $headline=$numbering . ' ' . $headline;
2253 # Create the anchor for linking from the TOC to the section
2254 $anchor = $canonized_headline;
2255 if($refcount[$headlineCount] > 1 ) {
2256 $anchor .= '_' . $refcount[$headlineCount];
2258 if( $doShowToc && ( !isset($wgMaxTocLevel) ||
$toclevel<$wgMaxTocLevel ) ) {
2259 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2261 if( $showEditLink ) {
2262 if ( empty( $head[$headlineCount] ) ) {
2263 $head[$headlineCount] = '';
2265 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+
1);
2268 # Add the edit section span
2269 if( $rightClickHack ) {
2270 $headline = $sk->editSectionScript($headlineCount+
1,$headline);
2273 # give headline the correct <h#> tag
2274 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
2280 $toclines = $headlineCount;
2281 $toc .= $sk->tocUnindent( $toclevel );
2282 $toc = $sk->tocTable( $toc );
2285 # split up and insert constructed headlines
2287 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2290 foreach( $blocks as $block ) {
2291 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2292 # This is the [edit] link that appears for the top block of text when
2293 # section editing is enabled
2295 # Disabled because it broke block formatting
2296 # For example, a bullet point in the top line
2297 # $full .= $sk->editSectionLink(0);
2300 if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2301 # Top anchor now in skin
2305 if( !empty( $head[$i] ) ) {
2311 $mw =& MagicWord
::get( MAG_TOC
);
2312 return $mw->replace( $toc, $full );
2318 # Return an HTML link for the "ISBN 123456" text
2319 /* private */ function magicISBN( $text ) {
2321 $fname = 'Parser::magicISBN';
2322 wfProfileIn( $fname );
2324 $a = split( 'ISBN ', " $text" );
2325 if ( count ( $a ) < 2 ) {
2326 wfProfileOut( $fname );
2329 $text = substr( array_shift( $a ), 1);
2330 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2332 foreach ( $a as $x ) {
2333 $isbn = $blank = '' ;
2334 while ( ' ' == $x{0} ) {
2336 $x = substr( $x, 1 );
2338 while ( strstr( $valid, $x{0} ) != false ) {
2340 $x = substr( $x, 1 );
2342 $num = str_replace( '-', '', $isbn );
2343 $num = str_replace( ' ', '', $num );
2346 $text .= "ISBN $blank$x";
2348 $titleObj = Title
::makeTitle( NS_SPECIAL
, 'Booksources' );
2349 $text .= '<a href="' .
2350 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
2351 "\" class=\"internal\">ISBN $isbn</a>";
2355 wfProfileOut( $fname );
2359 # Return an HTML link for the "GEO ..." text
2360 /* private */ function magicGEO( $text ) {
2361 global $wgLang, $wgUseGeoMode;
2362 $fname = 'Parser::magicGEO';
2363 wfProfileIn( $fname );
2365 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2366 $directions = array ( "N" => "North" , "S" => "South" , "E" => "East" , "W" => "West" ) ;
2367 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['N']}, (\d+)°(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2368 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['N']}, (\d+)°(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2369 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['S']}, (\d+)°(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2370 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['S']}, (\d+)°(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2372 $a = split( 'GEO ', " $text" );
2373 if ( count ( $a ) < 2 ) {
2374 wfProfileOut( $fname );
2377 $text = substr( array_shift( $a ), 1);
2378 $valid = '0123456789.+-:';
2380 foreach ( $a as $x ) {
2381 $geo = $blank = '' ;
2382 while ( ' ' == $x{0} ) {
2384 $x = substr( $x, 1 );
2386 while ( strstr( $valid, $x{0} ) != false ) {
2388 $x = substr( $x, 1 );
2390 $num = str_replace( '+', '', $geo );
2391 $num = str_replace( ' ', '', $num );
2393 if ( '' == $num ||
count ( explode ( ":" , $num , 3 ) ) < 2 ) {
2394 $text .= "GEO $blank$x";
2396 $titleObj = Title
::makeTitle( NS_SPECIAL
, 'Geo' );
2397 $text .= '<a href="' .
2398 $titleObj->escapeLocalUrl( "coordinates={$num}" ) .
2399 "\" class=\"internal\">GEO $geo</a>";
2403 wfProfileOut( $fname );
2407 # Return an HTML link for the "RFC 1234" text
2408 /* private */ function magicRFC( $text ) {
2411 $a = split( 'RFC ', ' '.$text );
2412 if ( count ( $a ) < 2 ) return $text;
2413 $text = substr( array_shift( $a ), 1);
2414 $valid = '0123456789';
2416 foreach ( $a as $x ) {
2417 $rfc = $blank = '' ;
2418 while ( ' ' == $x{0} ) {
2420 $x = substr( $x, 1 );
2422 while ( strstr( $valid, $x{0} ) != false ) {
2424 $x = substr( $x, 1 );
2428 $text .= "RFC $blank$x";
2430 $url = wfmsg( 'rfcurl' );
2431 $url = str_replace( '$1', $rfc, $url);
2432 $sk =& $this->mOptions
->getSkin();
2433 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
2434 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2440 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2441 $this->mOptions
= $options;
2442 $this->mTitle
=& $title;
2443 $this->mOutputType
= OT_WIKI
;
2445 if ( $clearState ) {
2446 $this->clearState();
2449 $stripState = false;
2453 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2457 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2458 "/<br *?>/i" => "<br />",
2460 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2462 $text = $this->strip( $text, $stripState, false );
2463 $text = $this->pstPass2( $text, $user );
2464 $text = $this->unstrip( $text, $stripState );
2465 $text = $this->unstripNoWiki( $text, $stripState );
2469 /* private */ function pstPass2( $text, &$user ) {
2470 global $wgLang, $wgLocaltimezone, $wgCurParser;
2472 # Variable replacement
2473 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2474 $text = $this->replaceVariables( $text );
2478 $n = $user->getName();
2479 $k = $user->getOption( 'nickname' );
2480 if ( '' == $k ) { $k = $n; }
2481 if(isset($wgLocaltimezone)) {
2482 $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2484 /* Note: this is an ugly timezone hack for the European wikis */
2485 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2486 ' (' . date( 'T' ) . ')';
2487 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2489 $text = preg_replace( '/~~~~~/', $d, $text );
2490 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText( NS_USER
) . ":$n|$k]] $d", $text );
2491 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText( NS_USER
) . ":$n|$k]]", $text );
2493 # Context links: [[|name]] and [[name (context)|]]
2495 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2496 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2497 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2498 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2500 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
2501 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
2502 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
2503 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
2504 # [[ns:page (cont)|]]
2506 $t = $this->mTitle
->getText();
2507 if ( preg_match( $conpat, $t, $m ) ) {
2510 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2511 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2512 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2514 if ( '' == $context ) {
2515 $text = preg_replace( $p2, '[[\\1]]', $text );
2517 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2521 $mw =& MagicWord::get( MAG_SUBST );
2522 $wgCurParser = $this->fork();
2523 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2524 $this->merge( $wgCurParser );
2527 # Trim trailing whitespace
2528 # MAG_END (__END__) tag allows for trailing
2529 # whitespace to be deliberately included
2530 $text = rtrim( $text );
2531 $mw =& MagicWord
::get( MAG_END
);
2532 $mw->matchAndRemove( $text );
2537 # Set up some variables which are usually set up in parse()
2538 # so that an external function can call some class members with confidence
2539 function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2540 $this->mTitle
=& $title;
2541 $this->mOptions
= $options;
2542 $this->mOutputType
= $outputType;
2543 if ( $clearState ) {
2544 $this->clearState();
2548 function transformMsg( $text, $options ) {
2550 static $executing = false;
2552 # Guard against infinite recursion
2558 $this->mTitle
= $wgTitle;
2559 $this->mOptions
= $options;
2560 $this->mOutputType
= OT_MSG
;
2561 $this->clearState();
2562 $text = $this->replaceVariables( $text );
2568 # Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2569 # Callback will be called with the text within
2570 # Transform and return the text within
2571 function setHook( $tag, $callback ) {
2572 $oldVal = @$this->mTagHooks
[$tag];
2573 $this->mTagHooks
[$tag] = $callback;
2580 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2581 var $mCacheTime; # Used in ParserCache
2583 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2584 $containsOldMagic = false )
2586 $this->mText
= $text;
2587 $this->mLanguageLinks
= $languageLinks;
2588 $this->mCategoryLinks
= $categoryLinks;
2589 $this->mContainsOldMagic
= $containsOldMagic;
2590 $this->mCacheTime
= "";
2593 function getText() { return $this->mText
; }
2594 function getLanguageLinks() { return $this->mLanguageLinks
; }
2595 function getCategoryLinks() { return $this->mCategoryLinks
; }
2596 function getCacheTime() { return $this->mCacheTime
; }
2597 function containsOldMagic() { return $this->mContainsOldMagic
; }
2598 function setText( $text ) { return wfSetVar( $this->mText
, $text ); }
2599 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks
, $ll ); }
2600 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks
, $cl ); }
2601 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic
, $com ); }
2602 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime
, $t ); }
2604 function merge( $other ) {
2605 $this->mLanguageLinks
= array_merge( $this->mLanguageLinks
, $other->mLanguageLinks
);
2606 $this->mCategoryLinks
= array_merge( $this->mCategoryLinks
, $this->mLanguageLinks
);
2607 $this->mContainsOldMagic
= $this->mContainsOldMagic ||
$other->mContainsOldMagic
;
2614 # All variables are private
2615 var $mUseTeX; # Use texvc to expand <math> tags
2616 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2617 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2618 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2619 var $mAllowExternalImages; # Allow external images inline
2620 var $mSkin; # Reference to the preferred skin
2621 var $mDateFormat; # Date format index
2622 var $mEditSection; # Create "edit section" links
2623 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2624 var $mNumberHeadings; # Automatically number headings
2625 var $mShowToc; # Show table of contents
2627 function getUseTeX() { return $this->mUseTeX
; }
2628 function getUseCategoryMagic() { return $this->mUseCategoryMagic
; }
2629 function getUseDynamicDates() { return $this->mUseDynamicDates
; }
2630 function getInterwikiMagic() { return $this->mInterwikiMagic
; }
2631 function getAllowExternalImages() { return $this->mAllowExternalImages
; }
2632 function getSkin() { return $this->mSkin
; }
2633 function getDateFormat() { return $this->mDateFormat
; }
2634 function getEditSection() { return $this->mEditSection
; }
2635 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick
; }
2636 function getNumberHeadings() { return $this->mNumberHeadings
; }
2637 function getShowToc() { return $this->mShowToc
; }
2639 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX
, $x ); }
2640 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic
, $x ); }
2641 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates
, $x ); }
2642 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic
, $x ); }
2643 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages
, $x ); }
2644 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat
, $x ); }
2645 function setEditSection( $x ) { return wfSetVar( $this->mEditSection
, $x ); }
2646 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick
, $x ); }
2647 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings
, $x ); }
2648 function setShowToc( $x ) { return wfSetVar( $this->mShowToc
, $x ); }
2650 function setSkin( &$x ) { $this->mSkin
=& $x; }
2652 # Get parser options
2653 /* static */ function newFromUser( &$user ) {
2654 $popts = new ParserOptions
;
2655 $popts->initialiseFromUser( $user );
2660 function initialiseFromUser( &$userInput ) {
2661 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2663 if ( !$userInput ) {
2665 $user->setLoaded( true );
2667 $user =& $userInput;
2670 $this->mUseTeX
= $wgUseTeX;
2671 $this->mUseCategoryMagic
= $wgUseCategoryMagic;
2672 $this->mUseDynamicDates
= $wgUseDynamicDates;
2673 $this->mInterwikiMagic
= $wgInterwikiMagic;
2674 $this->mAllowExternalImages
= $wgAllowExternalImages;
2675 $this->mSkin
=& $user->getSkin();
2676 $this->mDateFormat
= $user->getOption( 'date' );
2677 $this->mEditSection
= $user->getOption( 'editsection' );
2678 $this->mEditSectionOnRightClick
= $user->getOption( 'editsectiononrightclick' );
2679 $this->mNumberHeadings
= $user->getOption( 'numberheadings' );
2680 $this->mShowToc
= $user->getOption( 'showtoc' );
2686 # Regex callbacks, used in Parser::replaceVariables
2687 function wfBraceSubstitution( $matches ) {
2688 global $wgCurParser;
2689 return $wgCurParser->braceSubstitution( $matches );
2692 function wfArgSubstitution( $matches ) {
2693 global $wgCurParser;
2694 return $wgCurParser->argSubstitution( $matches );
2697 function wfVariableSubstitution( $matches ) {
2698 global $wgCurParser;
2699 return $wgCurParser->variableSubstitution( $matches );
2702 # Return the total number of articles
2703 function wfNumberOfArticles() {
2704 global $wgNumberOfArticles;
2707 return $wgNumberOfArticles;
2710 # Get various statistics from the database
2711 /* private */ function wfLoadSiteStats() {
2712 global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2713 $fname = 'wfLoadSiteStats';
2715 if ( -1 != $wgNumberOfArticles ) return;
2716 $dbr =& wfGetDB( DB_SLAVE
);
2717 $s = $dbr->getArray( 'site_stats',
2718 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2719 array( 'ss_row_id' => 1 ), $fname
2722 if ( $s === false ) {
2725 $wgTotalViews = $s->ss_total_views
;
2726 $wgTotalEdits = $s->ss_total_edits
;
2727 $wgNumberOfArticles = $s->ss_good_articles
;
2731 function wfEscapeHTMLTagsOnly( $in ) {
2733 array( '"', '>', '<' ),
2734 array( '"', '>', '<' ),