4 * File for Parser and related classes
10 * Update this version number when the ParserOutput format
11 * changes in an incompatible way, so the parser cache
12 * can automatically discard old data.
14 define( 'MW_PARSER_VERSION', '1.4.0' );
17 * Variable substitution O(N^2) attack
19 * Without countermeasures, it would be possible to attack the parser by saving
20 * a page filled with a large number of inclusions of large pages. The size of
21 * the generated page would be proportional to the square of the input size.
22 * Hence, we limit the number of inclusions of any given page, thus bringing any
23 * attack back to O(N).
26 define( 'MAX_INCLUDE_REPEAT', 100 );
27 define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Million
29 define( 'RLH_FOR_UPDATE', 1 );
31 # Allowed values for $mOutputType
32 define( 'OT_HTML', 1 );
33 define( 'OT_WIKI', 2 );
34 define( 'OT_MSG' , 3 );
36 # string parameter for extractTags which will cause it
37 # to strip HTML comments in addition to regular
38 # <XML>-style tags. This should not be anything we
39 # may want to use in wikisyntax
40 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
42 # prefix for escaping, used in two functions at least
43 define( 'UNIQ_PREFIX', 'NaodW29');
45 # Constants needed for external link processing
46 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
47 define( 'HTTP_PROTOCOLS', 'http|https' );
48 # Everything except bracket, space, or control characters
49 define( 'EXT_LINK_URL_CLASS', '[^]<>"\\x00-\\x20\\x7F]' );
51 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
52 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
53 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
54 define( 'EXT_LINK_BRACKETED', '/\[(('.URL_PROTOCOLS
.'):'.EXT_LINK_URL_CLASS
.'+) *('.EXT_LINK_TEXT_CLASS
.'*?)\]/S' );
55 define( 'EXT_IMAGE_REGEX',
56 '/^('.HTTP_PROTOCOLS
.':)'. # Protocol
57 '('.EXT_LINK_URL_CLASS
.'+)\\/'. # Hostname and path
58 '('.EXT_IMAGE_FNAME_CLASS
.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS
.')$/S' # Filename
64 * Processes wiki markup
67 * There are three main entry points into the Parser class:
69 * produces HTML output
71 * produces altered wiki markup.
73 * performs brace substitution on MediaWiki messages
76 * objects: $wgLang, $wgDateFormatter, $wgLinkCache
78 * NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
81 * $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
82 * $wgNamespacesWithSubpages, $wgAllowExternalImages*,
85 * * only within ParserOptions
98 # Cleared with clearState():
99 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
100 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
103 var $mOptions, $mTitle, $mOutputType,
104 $mTemplates, // cache of already loaded templates, avoids
105 // multiple SQL queries for the same string
106 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
107 // in this path. Used for loop detection.
117 $this->mTemplates
= array();
118 $this->mTemplatePath
= array();
119 $this->mTagHooks
= array();
128 function clearState() {
129 $this->mOutput
= new ParserOutput
;
130 $this->mAutonumber
= 0;
131 $this->mLastSection
= "";
132 $this->mDTopen
= false;
133 $this->mVariables
= false;
134 $this->mIncludeCount
= array();
135 $this->mStripState
= array();
136 $this->mArgStack
= array();
137 $this->mInPre
= false;
141 * First pass--just handle <nowiki> sections, pass the rest off
142 * to internalParse() which does all the real work.
145 * @return ParserOutput a ParserOutput
147 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
148 global $wgUseTidy, $wgContLang;
149 $fname = 'Parser::parse';
150 wfProfileIn( $fname );
156 $this->mOptions
= $options;
157 $this->mTitle
=& $title;
158 $this->mOutputType
= OT_HTML
;
161 $text = $this->strip( $text, $this->mStripState
);
163 $text = $this->internalParse( $text, $linestart );
164 $text = $this->unstrip( $text, $this->mStripState
);
165 # Clean up special characters, only run once, next-to-last before doBlockLevels
168 # french spaces, last one Guillemet-left
169 # only if there is something before the space
170 '/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1 \\2',
171 # french spaces, Guillemet-right
172 "/(\\302\\253) /i"=>"\\1 ",
173 '/<hr *>/i' => '<hr />',
174 '/<br *>/i' => '<br />',
175 '/<center *>/i' => '<div class="center">',
176 '/<\\/center *>/i' => '</div>',
177 # Clean up spare ampersands; note that we probably ought to be
178 # more careful about named entities.
179 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&'
181 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
184 # french spaces, last one Guillemet-left
185 '/ (\\?|:|;|!|\\302\\273)/i' => ' \\1',
186 # french spaces, Guillemet-right
187 '/(\\302\\253) /i' => '\\1 ',
188 '/<center *>/i' => '<div class="center">',
189 '/<\\/center *>/i' => '</div>'
191 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
194 $text = $this->doBlockLevels( $text, $linestart );
196 $this->replaceLinkHolders( $text );
197 $text = $wgContLang->convert($text);
199 $text = $this->unstripNoWiki( $text, $this->mStripState
);
202 $text = Parser
::tidy($text);
205 $this->mOutput
->setText( $text );
206 wfProfileOut( $fname );
207 return $this->mOutput
;
211 * Get a random string
216 function getRandomString() {
217 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
221 * Replaces all occurrences of <$tag>content</$tag> in the text
222 * with a random marker and returns the new text. the output parameter
223 * $content will be an associative array filled with data on the form
224 * $unique_marker => content.
226 * If $content is already set, the additional entries will be appended
227 * If $tag is set to STRIP_COMMENTS, the function will extract
228 * <!-- HTML comments -->
233 function extractTags($tag, $text, &$content, $uniq_prefix = ''){
234 $rnd = $uniq_prefix . '-' . $tag . Parser
::getRandomString();
241 while ( '' != $text ) {
242 if($tag==STRIP_COMMENTS
) {
243 $p = preg_split( '/<!--/i', $text, 2 );
245 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
248 if ( ( count( $p ) < 2 ) ||
( '' == $p[1] ) ) {
251 if($tag==STRIP_COMMENTS
) {
252 $q = preg_split( '/-->/i', $p[1], 2 );
254 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
256 $marker = $rnd . sprintf('%08X', $n++
);
257 $content[$marker] = $q[0];
258 $stripped .= $marker;
266 * Strips and renders nowiki, pre, math, hiero
267 * If $render is set, performs necessary rendering operations on plugins
268 * Returns the text, and fills an array with data needed in unstrip()
269 * If the $state is already a valid strip state, it adds to the state
271 * @param bool $stripcomments when set, HTML comments <!-- like this -->
272 * will be stripped in addition to other tags. This is important
273 * for section editing, where these comments cause confusion when
274 * counting the sections in the wikisource
278 function strip( $text, &$state, $stripcomments = false ) {
279 $render = ($this->mOutputType
== OT_HTML
);
280 $html_content = array();
281 $nowiki_content = array();
282 $math_content = array();
283 $pre_content = array();
284 $comment_content = array();
285 $ext_content = array();
286 $gallery_content = array();
288 # Replace any instances of the placeholders
289 $uniq_prefix = UNIQ_PREFIX
;
290 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
293 global $wgRawHtml, $wgWhitelistEdit;
294 if( $wgRawHtml && $wgWhitelistEdit ) {
295 $text = Parser
::extractTags('html', $text, $html_content, $uniq_prefix);
296 foreach( $html_content as $marker => $content ) {
298 # Raw and unchecked for validity.
299 $html_content[$marker] = $content;
301 $html_content[$marker] = '<html>'.$content.'</html>';
307 $text = Parser
::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
308 foreach( $nowiki_content as $marker => $content ) {
310 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
312 $nowiki_content[$marker] = '<nowiki>'.$content.'</nowiki>';
317 $text = Parser
::extractTags('math', $text, $math_content, $uniq_prefix);
318 foreach( $math_content as $marker => $content ){
320 if( $this->mOptions
->getUseTeX() ) {
321 $math_content[$marker] = renderMath( $content );
323 $math_content[$marker] = '<math>'.$content.'<math>';
326 $math_content[$marker] = '<math>'.$content.'</math>';
331 $text = Parser
::extractTags('pre', $text, $pre_content, $uniq_prefix);
332 foreach( $pre_content as $marker => $content ){
334 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
336 $pre_content[$marker] = '<pre>'.$content.'</pre>';
341 $text = Parser
::extractTags('gallery', $text, $gallery_content, $uniq_prefix);
342 foreach( $gallery_content as $marker => $content ) {
343 require_once( 'ImageGallery.php' );
345 $gallery_content[$marker] = Parser
::renderImageGallery( $content );
347 $gallery_content[$marker] = '<gallery>'.$content.'</gallery>';
353 $text = Parser
::extractTags(STRIP_COMMENTS
, $text, $comment_content, $uniq_prefix);
354 foreach( $comment_content as $marker => $content ){
355 $comment_content[$marker] = '<!--'.$content.'-->';
360 foreach ( $this->mTagHooks
as $tag => $callback ) {
361 $ext_contents[$tag] = array();
362 $text = Parser
::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
363 foreach( $ext_content[$tag] as $marker => $content ) {
365 $ext_content[$tag][$marker] = $callback( $content );
367 $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
372 # Merge state with the pre-existing state, if there is one
374 $state['html'] = $state['html'] +
$html_content;
375 $state['nowiki'] = $state['nowiki'] +
$nowiki_content;
376 $state['math'] = $state['math'] +
$math_content;
377 $state['pre'] = $state['pre'] +
$pre_content;
378 $state['comment'] = $state['comment'] +
$comment_content;
379 $state['gallery'] = $state['gallery'] +
$gallery_content;
381 foreach( $ext_content as $tag => $array ) {
382 if ( array_key_exists( $tag, $state ) ) {
383 $state[$tag] = $state[$tag] +
$array;
388 'html' => $html_content,
389 'nowiki' => $nowiki_content,
390 'math' => $math_content,
391 'pre' => $pre_content,
392 'comment' => $comment_content,
393 'gallery' => $gallery_content,
400 * restores pre, math, and hiero removed by strip()
402 * always call unstripNoWiki() after this one
405 function unstrip( $text, &$state ) {
406 # Must expand in reverse order, otherwise nested tags will be corrupted
407 $contentDict = end( $state );
408 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
409 if( key($state) != 'nowiki' && key($state) != 'html') {
410 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
411 $text = str_replace( key( $contentDict ), $content, $text );
420 * always call this after unstrip() to preserve the order
424 function unstripNoWiki( $text, &$state ) {
425 # Must expand in reverse order, otherwise nested tags will be corrupted
426 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
427 $text = str_replace( key( $state['nowiki'] ), $content, $text );
432 for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
433 $text = str_replace( key( $state['html'] ), $content, $text );
441 * Add an item to the strip state
442 * Returns the unique tag which must be inserted into the stripped text
443 * The tag will be replaced with the original text in unstrip()
447 function insertStripItem( $text, &$state ) {
448 $rnd = UNIQ_PREFIX
. '-item' . Parser
::getRandomString();
457 $state['item'][$rnd] = $text;
462 * Return allowed HTML attributes
466 function getHTMLattrs () {
467 $htmlattrs = array( # Allowed attributes--no scripting, etc.
468 'title', 'align', 'lang', 'dir', 'width', 'height',
469 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
470 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
471 /* FONT */ 'type', 'start', 'value', 'compact',
472 /* For various lists, mostly deprecated but safe */
473 'summary', 'width', 'border', 'frame', 'rules',
474 'cellspacing', 'cellpadding', 'valign', 'char',
475 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
476 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
477 'id', 'class', 'name', 'style' /* For CSS */
483 * Remove non approved attributes and javascript in css
487 function fixTagAttributes ( $t ) {
488 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
489 $htmlattrs = $this->getHTMLattrs() ;
491 # Strip non-approved attributes from the tag
493 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
494 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
497 $t = str_replace ( '<></>' , '' , $t ) ; # This should fix bug 980557
499 # Strip javascript "expression" from stylesheets. Brute force approach:
500 # If anythin offensive is found, all attributes of the HTML tag are dropped
503 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
504 wfMungeToUtf8( $t ) ) )
513 * interface with html tidy, used if $wgUseTidy = true
518 function tidy ( $text ) {
519 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
520 global $wgInputEncoding, $wgOutputEncoding;
521 $fname = 'Parser::tidy';
522 wfProfileIn( $fname );
526 switch(strtoupper($wgOutputEncoding)) {
528 $opts .= ($wgInputEncoding == $wgOutputEncoding)?
' -latin1':' -raw';
531 $opts .= ($wgInputEncoding == $wgOutputEncoding)?
' -utf8':' -raw';
537 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
538 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
539 '<head><title>test</title></head><body>'.$text.'</body></html>';
540 $descriptorspec = array(
541 0 => array('pipe', 'r'),
542 1 => array('pipe', 'w'),
543 2 => array('file', '/dev/null', 'a')
545 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
546 if (is_resource($process)) {
547 fwrite($pipes[0], $wrappedtext);
549 while (!feof($pipes[1])) {
550 $cleansource .= fgets($pipes[1], 1024);
553 $return_value = proc_close($process);
556 wfProfileOut( $fname );
558 if( $cleansource == '' && $text != '') {
559 wfDebug( "Tidy error detected!\n" );
560 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
567 * parse the wiki syntax used to render tables
571 function doTableStuff ( $t ) {
572 $fname = 'Parser::doTableStuff';
573 wfProfileIn( $fname );
575 $t = explode ( "\n" , $t ) ;
576 $td = array () ; # Is currently a td tag open?
577 $ltd = array () ; # Was it TD or TH?
578 $tr = array () ; # Is currently a tr tag open?
579 $ltr = array () ; # tr attributes
580 $indent_level = 0; # indent level of the table
581 foreach ( $t AS $k => $x )
584 $fc = substr ( $x , 0 , 1 ) ;
585 if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
586 $indent_level = strlen( $matches[1] );
588 str_repeat( '<dl><dd>', $indent_level ) .
589 '<table ' . $this->fixTagAttributes ( $matches[2] ) . '>' ;
590 array_push ( $td , false ) ;
591 array_push ( $ltd , '' ) ;
592 array_push ( $tr , false ) ;
593 array_push ( $ltr , '' ) ;
595 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
596 else if ( '|}' == substr ( $x , 0 , 2 ) ) {
598 $l = array_pop ( $ltd ) ;
599 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
600 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
602 $t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
604 else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
605 $x = substr ( $x , 1 ) ;
606 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
608 $l = array_pop ( $ltd ) ;
609 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
610 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
613 array_push ( $tr , false ) ;
614 array_push ( $td , false ) ;
615 array_push ( $ltd , '' ) ;
616 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
618 else if ( '|' == $fc ||
'!' == $fc ||
'|+' == substr ( $x , 0 , 2 ) ) { # Caption
620 if ( '|+' == substr ( $x , 0 , 2 ) ) {
622 $x = substr ( $x , 1 ) ;
624 $after = substr ( $x , 1 ) ;
625 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
626 $after = explode ( '||' , $after ) ;
629 # Loop through each table cell
630 foreach ( $after AS $theline )
635 $tra = array_pop ( $ltr ) ;
636 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
637 array_push ( $tr , true ) ;
638 array_push ( $ltr , '' ) ;
641 $l = array_pop ( $ltd ) ;
642 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
643 if ( $fc == '|' ) $l = 'td' ;
644 else if ( $fc == '!' ) $l = 'th' ;
645 else if ( $fc == '+' ) $l = 'caption' ;
647 array_push ( $ltd , $l ) ;
650 $y = explode ( '|' , $theline , 2 ) ;
651 # Note that a '|' inside an invalid link should not
652 # be mistaken as delimiting cell parameters
653 if ( strpos( $y[0], '[[' ) !== false ) {
654 $y = array ($theline);
656 if ( count ( $y ) == 1 )
657 $y = "{$z}<{$l}>{$y[0]}" ;
658 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
660 array_push ( $td , true ) ;
665 # Closing open td, tr && table
666 while ( count ( $td ) > 0 )
668 if ( array_pop ( $td ) ) $t[] = '</td>' ;
669 if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
673 $t = implode ( "\n" , $t ) ;
674 # $t = $this->removeHTMLtags( $t );
675 wfProfileOut( $fname );
680 * Helper function for parse() that transforms wiki markup into
681 * HTML. Only called for $mOutputType == OT_HTML.
685 function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
688 $fname = 'Parser::internalParse';
689 wfProfileIn( $fname );
691 $text = $this->removeHTMLtags( $text );
692 $text = $this->replaceVariables( $text, $args );
694 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
696 $text = $this->doHeadings( $text );
697 if($this->mOptions
->getUseDynamicDates()) {
698 global $wgDateFormatter;
699 $text = $wgDateFormatter->reformat( $this->mOptions
->getDateFormat(), $text );
701 $text = $this->doAllQuotes( $text );
702 $text = $this->replaceInternalLinks( $text );
703 $text = $this->replaceExternalLinks( $text );
705 # replaceInternalLinks may sometimes leave behind
706 # absolute URLs, which have to be masked to hide them from replaceExternalLinks
707 $text = str_replace("http-noparse://","http://",$text);
709 $text = $this->doMagicLinks( $text );
710 $text = $this->doTableStuff( $text );
711 $text = $this->formatHeadings( $text, $isMain );
712 $sk =& $this->mOptions
->getSkin();
713 $text = $sk->transformContent( $text );
715 wfProfileOut( $fname );
720 * Replace special strings like "ISBN xxx" and "RFC xxx" with
721 * magic external links.
725 function &doMagicLinks( &$text ) {
726 global $wgUseGeoMode;
727 $text = $this->magicISBN( $text );
728 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
729 $text = $this->magicGEO( $text );
731 $text = $this->magicRFC( $text, 'RFC ', 'rfcurl' );
732 $text = $this->magicRFC( $text, 'PMID ', 'pubmedurl' );
737 * Parse ^^ tokens and return html
741 function doExponent( $text ) {
742 $fname = 'Parser::doExponent';
743 wfProfileIn( $fname );
744 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
745 wfProfileOut( $fname );
750 * Parse headers and return html
754 function doHeadings( $text ) {
755 $fname = 'Parser::doHeadings';
756 wfProfileIn( $fname );
757 for ( $i = 6; $i >= 1; --$i ) {
758 $h = substr( '======', 0, $i );
759 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
760 "<h{$i}>\\1</h{$i}>\\2", $text );
762 wfProfileOut( $fname );
767 * Replace single quotes with HTML markup
769 * @return string the altered text
771 function doAllQuotes( $text ) {
772 $fname = 'Parser::doAllQuotes';
773 wfProfileIn( $fname );
775 $lines = explode( "\n", $text );
776 foreach ( $lines as $line ) {
777 $outtext .= $this->doQuotes ( $line ) . "\n";
779 $outtext = substr($outtext, 0,-1);
780 wfProfileOut( $fname );
785 * Helper function for doAllQuotes()
788 function doQuotes( $text ) {
789 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE
);
790 if ( count( $arr ) == 1 )
794 # First, do some preliminary work. This may shift some apostrophes from
795 # being mark-up to being text. It also counts the number of occurrences
796 # of bold and italics mark-ups.
800 foreach ( $arr as $r )
802 if ( ( $i %
2 ) == 1 )
804 # If there are ever four apostrophes, assume the first is supposed to
805 # be text, and the remaining three constitute mark-up for bold text.
806 if ( strlen( $arr[$i] ) == 4 )
811 # If there are more than 5 apostrophes in a row, assume they're all
812 # text except for the last 5.
813 else if ( strlen( $arr[$i] ) > 5 )
815 $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 );
818 # Count the number of occurrences of bold and italics mark-ups.
819 # We are not counting sequences of five apostrophes.
820 if ( strlen( $arr[$i] ) == 2 ) $numitalics++
; else
821 if ( strlen( $arr[$i] ) == 3 ) $numbold++
; else
822 if ( strlen( $arr[$i] ) == 5 ) { $numitalics++
; $numbold++
; }
827 # If there is an odd number of both bold and italics, it is likely
828 # that one of the bold ones was meant to be an apostrophe followed
829 # by italics. Which one we cannot know for certain, but it is more
830 # likely to be one that has a single-letter word before it.
831 if ( ( $numbold %
2 == 1 ) && ( $numitalics %
2 == 1 ) )
834 $firstsingleletterword = -1;
835 $firstmultiletterword = -1;
837 foreach ( $arr as $r )
839 if ( ( $i %
2 == 1 ) and ( strlen( $r ) == 3 ) )
841 $x1 = substr ($arr[$i-1], -1);
842 $x2 = substr ($arr[$i-1], -2, 1);
844 if ($firstspace == -1) $firstspace = $i;
845 } else if ($x2 == ' ') {
846 if ($firstsingleletterword == -1) $firstsingleletterword = $i;
848 if ($firstmultiletterword == -1) $firstmultiletterword = $i;
854 # If there is a single-letter word, use it!
855 if ($firstsingleletterword > -1)
857 $arr [ $firstsingleletterword ] = "''";
858 $arr [ $firstsingleletterword-1 ] .= "'";
860 # If not, but there's a multi-letter word, use that one.
861 else if ($firstmultiletterword > -1)
863 $arr [ $firstmultiletterword ] = "''";
864 $arr [ $firstmultiletterword-1 ] .= "'";
866 # ... otherwise use the first one that has neither.
867 # (notice that it is possible for all three to be -1 if, for example,
868 # there is only one pentuple-apostrophe in the line)
869 else if ($firstspace > -1)
871 $arr [ $firstspace ] = "''";
872 $arr [ $firstspace-1 ] .= "'";
876 # Now let's actually convert our apostrophic mush to HTML!
885 if ($state == 'both')
892 if (strlen ($r) == 2)
895 { $output .= '</i>'; $state = ''; }
896 else if ($state == 'bi')
897 { $output .= '</i>'; $state = 'b'; }
898 else if ($state == 'ib')
899 { $output .= '</b></i><b>'; $state = 'b'; }
900 else if ($state == 'both')
901 { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
902 else # $state can be 'b' or ''
903 { $output .= '<i>'; $state .= 'i'; }
905 else if (strlen ($r) == 3)
908 { $output .= '</b>'; $state = ''; }
909 else if ($state == 'bi')
910 { $output .= '</i></b><i>'; $state = 'i'; }
911 else if ($state == 'ib')
912 { $output .= '</b>'; $state = 'i'; }
913 else if ($state == 'both')
914 { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
915 else # $state can be 'i' or ''
916 { $output .= '<b>'; $state .= 'b'; }
918 else if (strlen ($r) == 5)
921 { $output .= '</b><i>'; $state = 'i'; }
922 else if ($state == 'i')
923 { $output .= '</i><b>'; $state = 'b'; }
924 else if ($state == 'bi')
925 { $output .= '</i></b>'; $state = ''; }
926 else if ($state == 'ib')
927 { $output .= '</b></i>'; $state = ''; }
928 else if ($state == 'both')
929 { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
930 else # ($state == '')
931 { $buffer = ''; $state = 'both'; }
936 # Now close all remaining tags. Notice that the order is important.
937 if ($state == 'b' ||
$state == 'ib')
939 if ($state == 'i' ||
$state == 'bi' ||
$state == 'ib')
943 if ($state == 'both')
944 $output .= '<b><i>'.$buffer.'</i></b>';
950 * Replace external links
952 * Note: this is all very hackish and the order of execution matters a lot.
953 * Make sure to run maintenance/parserTests.php if you change this code.
957 function replaceExternalLinks( $text ) {
958 $fname = 'Parser::replaceExternalLinks';
959 wfProfileIn( $fname );
961 $sk =& $this->mOptions
->getSkin();
963 $linktrail = $wgContLang->linkTrail();
965 $bits = preg_split( EXT_LINK_BRACKETED
, $text, -1, PREG_SPLIT_DELIM_CAPTURE
);
967 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
970 while ( $i<count( $bits ) ) {
972 $protocol = $bits[$i++
];
974 $trail = $bits[$i++
];
976 # The characters '<' and '>' (which were escaped by
977 # removeHTMLtags()) should not be included in
978 # URLs, per RFC 2396.
979 if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE
)) {
980 $text = substr($url, $m2[0][1]) . ' ' . $text;
981 $url = substr($url, 0, $m2[0][1]);
984 # If the link text is an image URL, replace it with an <img> tag
985 # This happened by accident in the original parser, but some people used it extensively
986 $img = $this->maybeMakeImageLink( $text );
987 if ( $img !== false ) {
993 # No link text, e.g. [http://domain.tld/some.link]
995 # Autonumber if allowed
996 if ( strpos( HTTP_PROTOCOLS
, $protocol ) !== false ) {
997 $text = '[' . ++
$this->mAutonumber
. ']';
999 # Otherwise just use the URL
1000 $text = htmlspecialchars( $url );
1003 # Have link text, e.g. [http://domain.tld/some.link text]s
1005 if ( preg_match( $linktrail, $trail, $m2 ) ) {
1011 $encUrl = htmlspecialchars( $url );
1012 # Bit in parentheses showing the URL for the printable version
1013 if( $url == $text ||
preg_match( "!$protocol://" . preg_quote( $text, '/' ) . "/?$!", $url ) ) {
1016 # Expand the URL for printable version
1017 if ( ! $sk->suppressUrlExpansion() ) {
1018 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
1024 # Process the trail (i.e. everything after this link up until start of the next link),
1025 # replacing any non-bracketed links
1026 $trail = $this->replaceFreeExternalLinks( $trail );
1028 # Use the encoded URL
1029 # This means that users can paste URLs directly into the text
1030 # Funny characters like ö aren't valid in URLs anyway
1031 # This was changed in August 2004
1032 $s .= $sk->makeExternalLink( $url, $text, false ) . $dtrail. $paren . $trail;
1035 wfProfileOut( $fname );
1040 * Replace anything that looks like a URL with a link
1043 function replaceFreeExternalLinks( $text ) {
1044 $fname = 'Parser::replaceFreeExternalLinks';
1045 wfProfileIn( $fname );
1047 $bits = preg_split( '/((?:'.URL_PROTOCOLS
.'):)/S', $text, -1, PREG_SPLIT_DELIM_CAPTURE
);
1048 $s = array_shift( $bits );
1051 $sk =& $this->mOptions
->getSkin();
1053 while ( $i < count( $bits ) ){
1054 $protocol = $bits[$i++
];
1055 $remainder = $bits[$i++
];
1057 if ( preg_match( '/^('.EXT_LINK_URL_CLASS
.'+)(.*)$/s', $remainder, $m ) ) {
1058 # Found some characters after the protocol that look promising
1059 $url = $protocol . $m[1];
1062 # The characters '<' and '>' (which were escaped by
1063 # removeHTMLtags()) should not be included in
1064 # URLs, per RFC 2396.
1065 if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE
)) {
1066 $trail = substr($url, $m2[0][1]) . $trail;
1067 $url = substr($url, 0, $m2[0][1]);
1070 # Move trailing punctuation to $trail
1072 # If there is no left bracket, then consider right brackets fair game too
1073 if ( strpos( $url, '(' ) === false ) {
1077 $numSepChars = strspn( strrev( $url ), $sep );
1078 if ( $numSepChars ) {
1079 $trail = substr( $url, -$numSepChars ) . $trail;
1080 $url = substr( $url, 0, -$numSepChars );
1083 # Replace & from obsolete syntax with &.
1084 # All HTML entities will be escaped by makeExternalLink()
1085 # or maybeMakeImageLink()
1086 $url = str_replace( '&', '&', $url );
1088 # Is this an external image?
1089 $text = $this->maybeMakeImageLink( $url );
1090 if ( $text === false ) {
1091 # Not an image, make a link
1092 $text = $sk->makeExternalLink( $url, $url );
1094 $s .= $text . $trail;
1096 $s .= $protocol . $remainder;
1104 * make an image if it's allowed
1107 function maybeMakeImageLink( $url ) {
1108 $sk =& $this->mOptions
->getSkin();
1110 if ( $this->mOptions
->getAllowExternalImages() ) {
1111 if ( preg_match( EXT_IMAGE_REGEX
, $url ) ) {
1113 $text = $sk->makeImage( htmlspecialchars( $url ) );
1120 * Process [[ ]] wikilinks
1125 function replaceInternalLinks( $s ) {
1126 global $wgLang, $wgContLang, $wgLinkCache;
1127 global $wgDisableLangConversion;
1128 static $fname = 'Parser::replaceInternalLinks' ;
1130 wfProfileIn( $fname );
1132 wfProfileIn( $fname.'-setup' );
1134 # the % is needed to support urlencoded titles as well
1135 if ( !$tc ) { $tc = Title
::legalChars() . '#%'; }
1137 $sk =& $this->mOptions
->getSkin();
1138 global $wgUseOldExistenceCheck;
1139 # "Post-parse link colour check" works only on wiki text since it's now
1140 # in Parser. Enable it, then disable it when we're done.
1141 $saveParseColour = $sk->postParseLinkColour( !$wgUseOldExistenceCheck );
1143 $redirect = MagicWord
::get ( MAG_REDIRECT
) ;
1145 #split the entire text string on occurences of [[
1146 $a = explode( '[[', ' ' . $s );
1147 #get the first element (all text up to first [[), and remove the space we added
1148 $s = array_shift( $a );
1149 $s = substr( $s, 1 );
1151 # Match a link having the form [[namespace:link|alternate]]trail
1153 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1154 # Match cases where there is no "]]", which might still be images
1155 static $e1_img = FALSE;
1156 if ( !$e1_img ) { $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; }
1157 # Match the end of a line for a word that's not followed by whitespace,
1158 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1159 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1161 $useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
1163 $nottalk = !Namespace::isTalk( $this->mTitle
->getNamespace() );
1165 if ( $useLinkPrefixExtension ) {
1166 if ( preg_match( $e2, $s, $m ) ) {
1167 $first_prefix = $m[2];
1170 $first_prefix = false;
1176 $selflink = $this->mTitle
->getPrefixedText();
1177 wfProfileOut( $fname.'-setup' );
1179 $checkVariantLink = sizeof($wgContLang->getVariants())>1;
1180 $useSubpages = $this->areSubpagesAllowed();
1182 # Loop for each link
1183 for ($k = 0; isset( $a[$k] ); $k++
) {
1185 if ( $useLinkPrefixExtension ) {
1186 wfProfileIn( $fname.'-prefixhandling' );
1187 if ( preg_match( $e2, $s, $m ) ) {
1195 $prefix = $first_prefix;
1196 $first_prefix = false;
1198 wfProfileOut( $fname.'-prefixhandling' );
1201 $might_be_img = false;
1203 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1205 # fix up urlencoded title texts
1206 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1208 } elseif( preg_match($e1_img, $line, $m) ) { # Invalid, but might be an image with a link in its caption
1209 $might_be_img = true;
1211 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1213 } else { # Invalid form; output directly
1214 $s .= $prefix . '[[' . $line ;
1218 # Don't allow internal links to pages containing
1219 # PROTO: where PROTO is a valid URL protocol; these
1220 # should be external links.
1221 if (preg_match('/^((?:'.URL_PROTOCOLS
.'):)/', $m[1])) {
1222 $s .= $prefix . '[[' . $line ;
1226 # Make subpage if necessary
1227 if( $useSubpages ) {
1228 $link = $this->maybeDoSubpageLink( $m[1], $text );
1233 $noforce = (substr($m[1], 0, 1) != ':');
1235 # Strip off leading ':'
1236 $link = substr($link, 1);
1239 $nt =& Title
::newFromText( $this->unstripNoWiki($link, $this->mStripState
) );
1241 $s .= $prefix . '[[' . $line;
1245 #check other language variants of the link
1246 #if the article does not exist
1247 if( $checkVariantLink
1248 && $nt->getArticleID() == 0 ) {
1249 $wgContLang->findVariantLink($link, $nt);
1252 $ns = $nt->getNamespace();
1253 $iw = $nt->getInterWiki();
1255 if ($might_be_img) { # if this is actually an invalid link
1256 if ($ns == NS_IMAGE
&& $noforce) { #but might be an image
1258 while (isset ($a[$k+
1]) ) {
1259 #look at the next 'line' to see if we can close it there
1260 $next_line = array_shift(array_splice( $a, $k +
1, 1) );
1261 if( preg_match("/^(.*?]].*?)]](.*)$/sD", $next_line, $m) ) {
1262 # the first ]] closes the inner link, the second the image
1264 $text .= '[[' . $m[1];
1267 } elseif( preg_match("/^.*?]].*$/sD", $next_line, $m) ) {
1268 #if there's exactly one ]] that's fine, we'll keep looking
1269 $text .= '[[' . $m[0];
1271 #if $next_line is invalid too, we need look no further
1272 $text .= '[[' . $next_line;
1277 # we couldn't find the end of this imageLink, so output it raw
1278 #but don't ignore what might be perfectly normal links in the text we've examined
1279 $text = $this->replaceInternalLinks($text);
1280 $s .= $prefix . '[[' . $link . '|' . $text;
1281 # note: no $trail, because without an end, there *is* no trail
1284 } else { #it's not an image, so output it raw
1285 $s .= $prefix . '[[' . $link . '|' . $text;
1286 # note: no $trail, because without an end, there *is* no trail
1291 $wasblank = ( '' == $text );
1292 if( $wasblank ) $text = $link;
1295 # Link not escaped by : , create the various objects
1299 if( $iw && $this->mOptions
->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) {
1300 array_push( $this->mOutput
->mLanguageLinks
, $nt->getFullText() );
1301 $tmp = $prefix . $trail ;
1302 $s .= (trim($tmp) == '')?
'': $tmp;
1306 if ( $ns == NS_IMAGE
) {
1307 wfProfileIn( "$fname-image" );
1309 # recursively parse links inside the image caption
1310 # actually, this will parse them in any other parameters, too,
1311 # but it might be hard to fix that, and it doesn't matter ATM
1312 $text = $this->replaceExternalLinks($text);
1313 $text = $this->replaceInternalLinks($text);
1315 # replace the image with a link-holder so that replaceExternalLinks() can't mess with it
1316 $s .= $prefix . $this->insertStripItem( $sk->makeImageLinkObj( $nt, $text ), $this->mStripState
) . $trail;
1317 $wgLinkCache->addImageLinkObj( $nt );
1319 wfProfileOut( "$fname-image" );
1323 if ( $ns == NS_CATEGORY
) {
1324 wfProfileIn( "$fname-category" );
1325 $t = $nt->getText();
1327 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1328 $pPLC=$sk->postParseLinkColour();
1329 $sk->postParseLinkColour( false );
1330 $t = $sk->makeLinkObj( $nt, $t, '', '' , $prefix );
1331 $sk->postParseLinkColour( $pPLC );
1332 $wgLinkCache->resume();
1335 if ( $this->mTitle
->getNamespace() == NS_CATEGORY
) {
1336 $sortkey = $this->mTitle
->getText();
1338 $sortkey = $this->mTitle
->getPrefixedText();
1343 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1344 $this->mOutput
->addCategoryLink( $t );
1345 $s .= $prefix . $trail ;
1347 wfProfileOut( "$fname-category" );
1352 if( ( $nt->getPrefixedText() === $selflink ) &&
1353 ( $nt->getFragment() === '' ) ) {
1354 # Self-links are handled specially; generally de-link and change to bold.
1355 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1359 # Special and Media are pseudo-namespaces; no pages actually exist in them
1360 if( $ns == NS_MEDIA
) {
1361 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text, true ) . $trail;
1362 $wgLinkCache->addImageLinkObj( $nt );
1364 } elseif( $ns == NS_SPECIAL
) {
1365 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1368 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1370 $sk->postParseLinkColour( $saveParseColour );
1371 wfProfileOut( $fname );
1376 * Return true if subpage links should be expanded on this page.
1379 function areSubpagesAllowed() {
1380 # Some namespaces don't allow subpages
1381 global $wgNamespacesWithSubpages;
1382 return !empty($wgNamespacesWithSubpages[$this->mTitle
->getNamespace()]);
1386 * Handle link to subpage if necessary
1387 * @param string $target the source of the link
1388 * @param string &$text the link text, modified as necessary
1389 * @return string the full name of the link
1392 function maybeDoSubpageLink($target, &$text) {
1395 # :Foobar -- override special treatment of prefix (images, language links)
1396 # /Foobar -- convert to CurrentPage/Foobar
1397 # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1398 # ../ -- convert to CurrentPage, from CurrentPage/CurrentSubPage
1399 # ../Foobar -- convert to CurrentPage/Foobar, from CurrentPage/CurrentSubPage
1401 $fname = 'Parser::maybeDoSubpageLink';
1402 wfProfileIn( $fname );
1403 $ret = $target; # default return value is no change
1405 # Some namespaces don't allow subpages,
1406 # so only perform processing if subpages are allowed
1407 if( $this->areSubpagesAllowed() ) {
1408 # Look at the first character
1409 if( $target != '' && $target{0} == '/' ) {
1410 # / at end means we don't want the slash to be shown
1411 if( substr( $target, -1, 1 ) == '/' ) {
1412 $target = substr( $target, 1, -1 );
1415 $noslash = substr( $target, 1 );
1418 $ret = $this->mTitle
->getPrefixedText(). '/' . trim($noslash);
1419 if( '' === $text ) {
1421 } # this might be changed for ugliness reasons
1423 # check for .. subpage backlinks
1425 $nodotdot = $target;
1426 while( strncmp( $nodotdot, "../", 3 ) == 0 ) {
1428 $nodotdot = substr( $nodotdot, 3 );
1430 if($dotdotcount > 0) {
1431 $exploded = explode( '/', $this->mTitle
->GetPrefixedText() );
1432 if( count( $exploded ) > $dotdotcount ) { # not allowed to go below top level page
1433 $ret = implode( '/', array_slice( $exploded, 0, -$dotdotcount ) );
1434 # / at the end means don't show full path
1435 if( substr( $nodotdot, -1, 1 ) == '/' ) {
1436 $nodotdot = substr( $nodotdot, 0, -1 );
1437 if( '' === $text ) {
1441 $nodotdot = trim( $nodotdot );
1442 if( $nodotdot != '' ) {
1443 $ret .= '/' . $nodotdot;
1450 wfProfileOut( $fname );
1455 * Used by doBlockLevels()
1458 /* private */ function closeParagraph() {
1460 if ( '' != $this->mLastSection
) {
1461 $result = '</' . $this->mLastSection
. ">\n";
1463 $this->mInPre
= false;
1464 $this->mLastSection
= '';
1467 # getCommon() returns the length of the longest common substring
1468 # of both arguments, starting at the beginning of both.
1470 /* private */ function getCommon( $st1, $st2 ) {
1471 $fl = strlen( $st1 );
1472 $shorter = strlen( $st2 );
1473 if ( $fl < $shorter ) { $shorter = $fl; }
1475 for ( $i = 0; $i < $shorter; ++
$i ) {
1476 if ( $st1{$i} != $st2{$i} ) { break; }
1480 # These next three functions open, continue, and close the list
1481 # element appropriate to the prefix character passed into them.
1483 /* private */ function openList( $char ) {
1484 $result = $this->closeParagraph();
1486 if ( '*' == $char ) { $result .= '<ul><li>'; }
1487 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1488 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1489 else if ( ';' == $char ) {
1490 $result .= '<dl><dt>';
1491 $this->mDTopen
= true;
1493 else { $result = '<!-- ERR 1 -->'; }
1498 /* private */ function nextItem( $char ) {
1499 if ( '*' == $char ||
'#' == $char ) { return '</li><li>'; }
1500 else if ( ':' == $char ||
';' == $char ) {
1502 if ( $this->mDTopen
) { $close = '</dt>'; }
1503 if ( ';' == $char ) {
1504 $this->mDTopen
= true;
1505 return $close . '<dt>';
1507 $this->mDTopen
= false;
1508 return $close . '<dd>';
1511 return '<!-- ERR 2 -->';
1514 /* private */ function closeList( $char ) {
1515 if ( '*' == $char ) { $text = '</li></ul>'; }
1516 else if ( '#' == $char ) { $text = '</li></ol>'; }
1517 else if ( ':' == $char ) {
1518 if ( $this->mDTopen
) {
1519 $this->mDTopen
= false;
1520 $text = '</dt></dl>';
1522 $text = '</dd></dl>';
1525 else { return '<!-- ERR 3 -->'; }
1531 * Make lists from lines starting with ':', '*', '#', etc.
1534 * @return string the lists rendered as HTML
1536 function doBlockLevels( $text, $linestart ) {
1537 $fname = 'Parser::doBlockLevels';
1538 wfProfileIn( $fname );
1540 # Parsing through the text line by line. The main thing
1541 # happening here is handling of block-level elements p, pre,
1542 # and making lists from lines starting with * # : etc.
1544 $textLines = explode( "\n", $text );
1546 $lastPrefix = $output = $lastLine = '';
1547 $this->mDTopen
= $inBlockElem = false;
1549 $paragraphStack = false;
1551 if ( !$linestart ) {
1552 $output .= array_shift( $textLines );
1554 foreach ( $textLines as $oLine ) {
1555 $lastPrefixLength = strlen( $lastPrefix );
1556 $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1557 $preOpenMatch = preg_match('/<pre/i', $oLine );
1558 if ( !$this->mInPre
) {
1559 # Multiple prefixes may abut each other for nested lists.
1560 $prefixLength = strspn( $oLine, '*#:;' );
1561 $pref = substr( $oLine, 0, $prefixLength );
1564 $pref2 = str_replace( ';', ':', $pref );
1565 $t = substr( $oLine, $prefixLength );
1566 $this->mInPre
= !empty($preOpenMatch);
1568 # Don't interpret any other prefixes in preformatted text
1570 $pref = $pref2 = '';
1575 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1576 # Same as the last item, so no need to deal with nesting or opening stuff
1577 $output .= $this->nextItem( substr( $pref, -1 ) );
1578 $paragraphStack = false;
1580 if ( substr( $pref, -1 ) == ';') {
1581 # The one nasty exception: definition lists work like this:
1582 # ; title : definition text
1583 # So we check for : in the remainder text to split up the
1584 # title and definition, without b0rking links.
1585 if ($this->findColonNoLinks($t, $term, $t2) !== false) {
1587 $output .= $term . $this->nextItem( ':' );
1590 } elseif( $prefixLength ||
$lastPrefixLength ) {
1591 # Either open or close a level...
1592 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1593 $paragraphStack = false;
1595 while( $commonPrefixLength < $lastPrefixLength ) {
1596 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1597 --$lastPrefixLength;
1599 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1600 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1602 while ( $prefixLength > $commonPrefixLength ) {
1603 $char = substr( $pref, $commonPrefixLength, 1 );
1604 $output .= $this->openList( $char );
1606 if ( ';' == $char ) {
1607 # FIXME: This is dupe of code above
1608 if ($this->findColonNoLinks($t, $term, $t2) !== false) {
1610 $output .= $term . $this->nextItem( ':' );
1613 ++
$commonPrefixLength;
1615 $lastPrefix = $pref2;
1617 if( 0 == $prefixLength ) {
1618 wfProfileIn( "$fname-paragraph" );
1619 # No prefix (not in list)--go to paragraph mode
1620 $uniq_prefix = UNIQ_PREFIX
;
1621 // XXX: use a stack for nestable elements like span, table and div
1622 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
1623 $closematch = preg_match(
1624 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1625 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/iS', $t );
1626 if ( $openmatch or $closematch ) {
1627 $paragraphStack = false;
1628 $output .= $this->closeParagraph();
1629 if($preOpenMatch and !$preCloseMatch) {
1630 $this->mInPre
= true;
1632 if ( $closematch ) {
1633 $inBlockElem = false;
1635 $inBlockElem = true;
1637 } else if ( !$inBlockElem && !$this->mInPre
) {
1638 if ( ' ' == $t{0} and ( $this->mLastSection
== 'pre' or trim($t) != '' ) ) {
1640 if ($this->mLastSection
!= 'pre') {
1641 $paragraphStack = false;
1642 $output .= $this->closeParagraph().'<pre>';
1643 $this->mLastSection
= 'pre';
1645 $t = substr( $t, 1 );
1648 if ( '' == trim($t) ) {
1649 if ( $paragraphStack ) {
1650 $output .= $paragraphStack.'<br />';
1651 $paragraphStack = false;
1652 $this->mLastSection
= 'p';
1654 if ($this->mLastSection
!= 'p' ) {
1655 $output .= $this->closeParagraph();
1656 $this->mLastSection
= '';
1657 $paragraphStack = '<p>';
1659 $paragraphStack = '</p><p>';
1663 if ( $paragraphStack ) {
1664 $output .= $paragraphStack;
1665 $paragraphStack = false;
1666 $this->mLastSection
= 'p';
1667 } else if ($this->mLastSection
!= 'p') {
1668 $output .= $this->closeParagraph().'<p>';
1669 $this->mLastSection
= 'p';
1674 wfProfileOut( "$fname-paragraph" );
1676 if ($paragraphStack === false) {
1680 while ( $prefixLength ) {
1681 $output .= $this->closeList( $pref2{$prefixLength-1} );
1684 if ( '' != $this->mLastSection
) {
1685 $output .= '</' . $this->mLastSection
. '>';
1686 $this->mLastSection
= '';
1689 wfProfileOut( $fname );
1694 * Split up a string on ':', ignoring any occurences inside
1695 * <a>..</a> or <span>...</span>
1696 * @param string $str the string to split
1697 * @param string &$before set to everything before the ':'
1698 * @param string &$after set to everything after the ':'
1699 * return string the position of the ':', or false if none found
1701 function findColonNoLinks($str, &$before, &$after) {
1702 # I wonder if we should make this count all tags, not just <a>
1703 # and <span>. That would prevent us from matching a ':' that
1704 # comes in the middle of italics other such formatting....
1706 $fname = 'Parser::findColonNoLinks';
1707 wfProfileIn( $fname );
1710 $colon = strpos($str, ':', $pos);
1712 if ($colon !== false) {
1713 $before = substr($str, 0, $colon);
1714 $after = substr($str, $colon +
1);
1716 # Skip any ':' within <a> or <span> pairs
1717 $a = substr_count($before, '<a');
1718 $s = substr_count($before, '<span');
1719 $ca = substr_count($before, '</a>');
1720 $cs = substr_count($before, '</span>');
1722 if ($a <= $ca and $s <= $cs) {
1723 # Tags are balanced before ':'; ok
1728 } while ($colon !== false);
1729 wfProfileOut( $fname );
1734 * Return value of a magic variable (like PAGENAME)
1738 function getVariableValue( $index ) {
1739 global $wgContLang, $wgSitename, $wgServer;
1742 * Some of these require message or data lookups and can be
1743 * expensive to check many times.
1745 static $varCache = array();
1746 if( isset( $varCache[$index] ) ) return $varCache[$index];
1749 case MAG_CURRENTMONTH
:
1750 return $varCache[$index] = $wgContLang->formatNum( date( 'm' ) );
1751 case MAG_CURRENTMONTHNAME
:
1752 return $varCache[$index] = $wgContLang->getMonthName( date('n') );
1753 case MAG_CURRENTMONTHNAMEGEN
:
1754 return $varCache[$index] = $wgContLang->getMonthNameGen( date('n') );
1755 case MAG_CURRENTDAY
:
1756 return $varCache[$index] = $wgContLang->formatNum( date('j') );
1758 return $this->mTitle
->getText();
1760 return $this->mTitle
->getPartialURL();
1762 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1763 return $wgContLang->getNsText($this->mTitle
->getNamespace()); # Patch by Dori
1764 case MAG_CURRENTDAYNAME
:
1765 return $varCache[$index] = $wgContLang->getWeekdayName( date('w')+
1 );
1766 case MAG_CURRENTYEAR
:
1767 return $varCache[$index] = $wgContLang->formatNum( date( 'Y' ) );
1768 case MAG_CURRENTTIME
:
1769 return $varCache[$index] = $wgContLang->time( wfTimestampNow(), false );
1770 case MAG_NUMBEROFARTICLES
:
1771 return $varCache[$index] = $wgContLang->formatNum( wfNumberOfArticles() );
1782 * initialise the magic variables (like CURRENTMONTHNAME)
1786 function initialiseVariables() {
1787 $fname = 'Parser::initialiseVariables';
1788 wfProfileIn( $fname );
1789 global $wgVariableIDs;
1790 $this->mVariables
= array();
1791 foreach ( $wgVariableIDs as $id ) {
1792 $mw =& MagicWord
::get( $id );
1793 $mw->addToArray( $this->mVariables
, $this->getVariableValue( $id ) );
1795 wfProfileOut( $fname );
1799 * Replace magic variables, templates, and template arguments
1800 * with the appropriate text. Templates are substituted recursively,
1801 * taking care to avoid infinite loops.
1803 * Note that the substitution depends on value of $mOutputType:
1804 * OT_WIKI: only {{subst:}} templates
1805 * OT_MSG: only magic variables
1806 * OT_HTML: all templates and magic variables
1808 * @param string $tex The text to transform
1809 * @param array $args Key-value pairs representing template parameters to substitute
1812 function replaceVariables( $text, $args = array() ) {
1813 global $wgLang, $wgScript, $wgArticlePath;
1815 # Prevent too big inclusions
1816 if( strlen( $text ) > MAX_INCLUDE_SIZE
) {
1820 $fname = 'Parser::replaceVariables';
1821 wfProfileIn( $fname );
1823 $titleChars = Title
::legalChars();
1825 # This function is called recursively. To keep track of arguments we need a stack:
1826 array_push( $this->mArgStack
, $args );
1828 # Variable substitution
1829 $text = preg_replace_callback( "/{{([$titleChars]*?)}}/", array( &$this, 'variableSubstitution' ), $text );
1831 if ( $this->mOutputType
== OT_HTML ||
$this->mOutputType
== OT_WIKI
) {
1832 # Argument substitution
1833 $text = preg_replace_callback( "/{{{([$titleChars]*?)}}}/", array( &$this, 'argSubstitution' ), $text );
1835 # Template substitution
1836 $regex = '/(\\n|{)?{{(['.$titleChars.']*)(\\|.*?|)}}/s';
1837 $text = preg_replace_callback( $regex, array( &$this, 'braceSubstitution' ), $text );
1839 array_pop( $this->mArgStack
);
1841 wfProfileOut( $fname );
1846 * Replace magic variables
1849 function variableSubstitution( $matches ) {
1850 $fname = 'parser::variableSubstitution';
1851 wfProfileIn( $fname );
1852 if ( !$this->mVariables
) {
1853 $this->initialiseVariables();
1856 if ( $this->mOutputType
== OT_WIKI
) {
1857 # Do only magic variables prefixed by SUBST
1858 $mwSubst =& MagicWord
::get( MAG_SUBST
);
1859 if (!$mwSubst->matchStartAndRemove( $matches[1] ))
1861 # Note that if we don't substitute the variable below,
1862 # we don't remove the {{subst:}} magic word, in case
1863 # it is a template rather than a magic variable.
1865 if ( !$skip && array_key_exists( $matches[1], $this->mVariables
) ) {
1866 $text = $this->mVariables
[$matches[1]];
1867 $this->mOutput
->mContainsOldMagic
= true;
1869 $text = $matches[0];
1871 wfProfileOut( $fname );
1875 # Split template arguments
1876 function getTemplateArgs( $argsString ) {
1877 if ( $argsString === '' ) {
1881 $args = explode( '|', substr( $argsString, 1 ) );
1883 # If any of the arguments contains a '[[' but no ']]', it needs to be
1884 # merged with the next arg because the '|' character between belongs
1885 # to the link syntax and not the template parameter syntax.
1886 $argc = count($args);
1888 for ( $i = 0; $i < $argc-1; $i++
) {
1889 if ( substr_count ( $args[$i], '[[' ) != substr_count ( $args[$i], ']]' ) ) {
1890 $args[$i] .= '|'.$args[$i+
1];
1891 array_splice($args, $i+
1, 1);
1901 * Return the text of a template, after recursively
1902 * replacing any variables or templates within the template.
1904 * @param array $matches The parts of the template
1905 * $matches[1]: the title, i.e. the part before the |
1906 * $matches[2]: the parameters (including a leading |), if any
1907 * @return string the text of the template
1910 function braceSubstitution( $matches ) {
1911 global $wgLinkCache, $wgContLang;
1912 $fname = 'Parser::braceSubstitution';
1913 wfProfileIn( $fname );
1921 # Need to know if the template comes at the start of a line,
1922 # to treat the beginning of the template like the beginning
1923 # of a line for tables and block-level elements.
1924 $linestart = $matches[1];
1926 # $part1 is the bit before the first |, and must contain only title characters
1927 # $args is a list of arguments, starting from index 0, not including $part1
1929 $part1 = $matches[2];
1930 # If the third subpattern matched anything, it will start with |
1932 $args = $this->getTemplateArgs($matches[3]);
1933 $argc = count( $args );
1935 # Don't parse {{{}}} because that's only for template arguments
1936 if ( $linestart === '{' ) {
1937 $text = $matches[0];
1944 $mwSubst =& MagicWord
::get( MAG_SUBST
);
1945 if ( $mwSubst->matchStartAndRemove( $part1 ) xor ($this->mOutputType
== OT_WIKI
) ) {
1946 # One of two possibilities is true:
1947 # 1) Found SUBST but not in the PST phase
1948 # 2) Didn't find SUBST and in the PST phase
1949 # In either case, return without further processing
1950 $text = $matches[0];
1956 # MSG, MSGNW and INT
1959 $mwMsgnw =& MagicWord
::get( MAG_MSGNW
);
1960 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1963 # Remove obsolete MSG:
1964 $mwMsg =& MagicWord
::get( MAG_MSG
);
1965 $mwMsg->matchStartAndRemove( $part1 );
1968 # Check if it is an internal message
1969 $mwInt =& MagicWord
::get( MAG_INT
);
1970 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1971 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1972 $text = $linestart . wfMsgReal( $part1, $args, true );
1980 # Check for NS: (namespace expansion)
1981 $mwNs = MagicWord
::get( MAG_NS
);
1982 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1983 if ( intval( $part1 ) ) {
1984 $text = $linestart . $wgContLang->getNsText( intval( $part1 ) );
1987 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1988 if ( !is_null( $index ) ) {
1989 $text = $linestart . $wgContLang->getNsText( $index );
1996 # LOCALURL and LOCALURLE
1998 $mwLocal = MagicWord
::get( MAG_LOCALURL
);
1999 $mwLocalE = MagicWord
::get( MAG_LOCALURLE
);
2001 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
2002 $func = 'getLocalURL';
2003 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
2004 $func = 'escapeLocalURL';
2009 if ( $func !== '' ) {
2010 $title = Title
::newFromText( $part1 );
2011 if ( !is_null( $title ) ) {
2013 $text = $linestart . $title->$func( $args[0] );
2015 $text = $linestart . $title->$func();
2023 if ( !$found && $argc == 1 ) {
2024 $mwGrammar =& MagicWord
::get( MAG_GRAMMAR
);
2025 if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
2026 $text = $linestart . $wgContLang->convertGrammar( $args[0], $part1 );
2031 # Template table test
2033 # Did we encounter this template already? If yes, it is in the cache
2034 # and we need to check for loops.
2035 if ( !$found && isset( $this->mTemplates
[$part1] ) ) {
2036 # set $text to cached message.
2037 $text = $linestart . $this->mTemplates
[$part1];
2040 # Infinite loop test
2041 if ( isset( $this->mTemplatePath
[$part1] ) ) {
2044 $text .= '<!-- WARNING: template loop detected -->';
2048 # Load from database
2049 $itcamefromthedatabase = false;
2052 $part1 = $this->maybeDoSubpageLink( $part1, $subpage='' );
2053 if ($subpage !== '') {
2054 $ns = $this->mTitle
->getNamespace();
2056 $title = Title
::newFromText( $part1, $ns );
2057 if ( !is_null( $title ) && !$title->isExternal() ) {
2058 # Check for excessive inclusion
2059 $dbk = $title->getPrefixedDBkey();
2060 if ( $this->incrementIncludeCount( $dbk ) ) {
2061 # This should never be reached.
2062 $article = new Article( $title );
2063 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
2064 if ( $articleContent !== false ) {
2066 $text = $linestart . $articleContent;
2067 $itcamefromthedatabase = true;
2071 # If the title is valid but undisplayable, make a link to it
2072 if ( $this->mOutputType
== OT_HTML
&& !$found ) {
2073 $text = $linestart . '[['.$title->getPrefixedText().']]';
2077 # Template cache array insertion
2078 $this->mTemplates
[$part1] = $text;
2082 # Recursive parsing, escaping and link table handling
2083 # Only for HTML output
2084 if ( $nowiki && $found && $this->mOutputType
== OT_HTML
) {
2085 $text = wfEscapeWikiText( $text );
2086 } elseif ( ($this->mOutputType
== OT_HTML ||
$this->mOutputType
== OT_WIKI
) && $found && !$noparse) {
2087 # Clean up argument array
2088 $assocArgs = array();
2090 foreach( $args as $arg ) {
2091 $eqpos = strpos( $arg, '=' );
2092 if ( $eqpos === false ) {
2093 $assocArgs[$index++
] = $arg;
2095 $name = trim( substr( $arg, 0, $eqpos ) );
2096 $value = trim( substr( $arg, $eqpos+
1 ) );
2097 if ( $value === false ) {
2100 if ( $name !== false ) {
2101 $assocArgs[$name] = $value;
2106 # Add a new element to the templace recursion path
2107 $this->mTemplatePath
[$part1] = 1;
2109 $text = $this->strip( $text, $this->mStripState
);
2110 $text = $this->removeHTMLtags( $text );
2111 $text = $this->replaceVariables( $text, $assocArgs );
2113 # Resume the link cache and register the inclusion as a link
2114 if ( $this->mOutputType
== OT_HTML
&& !is_null( $title ) ) {
2115 $wgLinkCache->addLinkObj( $title );
2118 # If the template begins with a table or block-level
2119 # element, it should be treated as beginning a new line.
2120 if ($linestart !== '\n' && preg_match('/^({\\||:|;|#|\*)/', $text)) {
2121 $text = "\n" . $text;
2125 # Empties the template path
2126 $this->mTemplatePath
= array();
2128 wfProfileOut( $fname );
2131 # replace ==section headers==
2132 # XXX this needs to go away once we have a better parser.
2133 if ( $this->mOutputType
!= OT_WIKI
&& $itcamefromthedatabase ) {
2134 if( !is_null( $title ) )
2135 $encodedname = base64_encode($title->getPrefixedDBkey());
2137 $encodedname = base64_encode("");
2138 $m = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1,
2139 PREG_SPLIT_DELIM_CAPTURE
);
2142 for( $i = 0; $i < count($m); $i +
= 2 ) {
2144 if (!isset($m[$i +
1]) ||
$m[$i +
1] == "") continue;
2146 if( strstr($hl, "<!--MWTEMPLATESECTION") ) {
2150 preg_match('/^(={1,6})(.*?)(={1,6})\s*?$/m', $hl, $m2);
2151 $text .= $m2[1] . $m2[2] . "<!--MWTEMPLATESECTION="
2152 . $encodedname . "&" . base64_encode("$nsec") . "-->" . $m2[3];
2159 # Empties the template path
2160 $this->mTemplatePath
= array();
2163 wfProfileOut( $fname );
2166 wfProfileOut( $fname );
2172 * Triple brace replacement -- used for template arguments
2175 function argSubstitution( $matches ) {
2176 $arg = trim( $matches[1] );
2177 $text = $matches[0];
2178 $inputArgs = end( $this->mArgStack
);
2180 if ( array_key_exists( $arg, $inputArgs ) ) {
2181 $text = $inputArgs[$arg];
2188 * Returns true if the function is allowed to include this entity
2191 function incrementIncludeCount( $dbk ) {
2192 if ( !array_key_exists( $dbk, $this->mIncludeCount
) ) {
2193 $this->mIncludeCount
[$dbk] = 0;
2195 if ( ++
$this->mIncludeCount
[$dbk] <= MAX_INCLUDE_REPEAT
) {
2204 * Cleans up HTML, removes dangerous tags and attributes, and
2205 * removes HTML comments
2208 function removeHTMLtags( $text ) {
2209 global $wgUseTidy, $wgUserHtml;
2210 $fname = 'Parser::removeHTMLtags';
2211 wfProfileIn( $fname );
2214 $htmlpairs = array( # Tags that must be closed
2215 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
2216 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
2217 'strike', 'strong', 'tt', 'var', 'div', 'center',
2218 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
2219 'ruby', 'rt' , 'rb' , 'rp', 'p'
2221 $htmlsingle = array(
2222 'br', 'hr', 'li', 'dt', 'dd'
2224 $htmlnest = array( # Tags that can be nested--??
2225 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
2226 'dl', 'font', 'big', 'small', 'sub', 'sup'
2228 $tabletags = array( # Can only appear inside table
2232 $htmlpairs = array();
2233 $htmlsingle = array();
2234 $htmlnest = array();
2235 $tabletags = array();
2238 $htmlsingle = array_merge( $tabletags, $htmlsingle );
2239 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
2241 $htmlattrs = $this->getHTMLattrs () ;
2243 # Remove HTML comments
2244 $text = $this->removeHTMLcomments( $text );
2246 $bits = explode( '<', $text );
2247 $text = array_shift( $bits );
2249 $tagstack = array(); $tablestack = array();
2250 foreach ( $bits as $x ) {
2251 $prev = error_reporting( E_ALL
& ~
( E_NOTICE | E_WARNING
) );
2252 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2254 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2255 error_reporting( $prev );
2258 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2262 if ( ! in_array( $t, $htmlsingle ) &&
2263 ( $ot = @array_pop
( $tagstack ) ) != $t ) {
2264 @array_push
( $tagstack, $ot );
2267 if ( $t == 'table' ) {
2268 $tagstack = array_pop( $tablestack );
2273 # Keep track for later
2274 if ( in_array( $t, $tabletags ) &&
2275 ! in_array( 'table', $tagstack ) ) {
2277 } else if ( in_array( $t, $tagstack ) &&
2278 ! in_array ( $t , $htmlnest ) ) {
2280 } else if ( ! in_array( $t, $htmlsingle ) ) {
2281 if ( $t == 'table' ) {
2282 array_push( $tablestack, $tagstack );
2283 $tagstack = array();
2285 array_push( $tagstack, $t );
2287 # Strip non-approved attributes from the tag
2288 $newparams = $this->fixTagAttributes($params);
2292 $rest = str_replace( '>', '>', $rest );
2293 $text .= "<$slash$t $newparams$brace$rest";
2297 $text .= '<' . str_replace( '>', '>', $x);
2299 # Close off any remaining tags
2300 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
2302 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2305 # this might be possible using tidy itself
2306 foreach ( $bits as $x ) {
2307 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2309 @list
( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2310 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2311 $newparams = $this->fixTagAttributes($params);
2312 $rest = str_replace( '>', '>', $rest );
2313 $text .= "<$slash$t $newparams$brace$rest";
2315 $text .= '<' . str_replace( '>', '>', $x);
2319 wfProfileOut( $fname );
2324 * Remove '<!--', '-->', and everything between.
2325 * To avoid leaving blank lines, when a comment is both preceded
2326 * and followed by a newline (ignoring spaces), trim leading and
2327 * trailing spaces and one of the newlines.
2331 function removeHTMLcomments( $text ) {
2332 $fname='Parser::removeHTMLcomments';
2333 wfProfileIn( $fname );
2334 while (($start = strpos($text, '<!--')) !== false) {
2335 $end = strpos($text, '-->', $start +
4);
2336 if ($end === false) {
2337 # Unterminated comment; bail out
2343 # Trim space and newline if the comment is both
2344 # preceded and followed by a newline
2345 $spaceStart = max($start - 1, 0);
2346 $spaceLen = $end - $spaceStart;
2347 while (substr($text, $spaceStart, 1) === ' ' && $spaceStart > 0) {
2351 while (substr($text, $spaceStart +
$spaceLen, 1) === ' ')
2353 if (substr($text, $spaceStart, 1) === "\n" and substr($text, $spaceStart +
$spaceLen, 1) === "\n") {
2354 # Remove the comment, leading and trailing
2355 # spaces, and leave only one newline.
2356 $text = substr_replace($text, "\n", $spaceStart, $spaceLen +
1);
2359 # Remove just the comment.
2360 $text = substr_replace($text, '', $start, $end - $start);
2363 wfProfileOut( $fname );
2368 * This function accomplishes several tasks:
2369 * 1) Auto-number headings if that option is enabled
2370 * 2) Add an [edit] link to sections for logged in users who have enabled the option
2371 * 3) Add a Table of contents on the top for users who have enabled the option
2372 * 4) Auto-anchor headings
2374 * It loops through all headlines, collects the necessary data, then splits up the
2375 * string and re-inserts the newly formatted headlines.
2378 /* private */ function formatHeadings( $text, $isMain=true ) {
2379 global $wgInputEncoding, $wgMaxTocLevel, $wgContLang, $wgLinkHolders;
2381 $doNumberHeadings = $this->mOptions
->getNumberHeadings();
2382 $doShowToc = $this->mOptions
->getShowToc();
2383 $forceTocHere = false;
2384 if( !$this->mTitle
->userCanEdit() ) {
2386 $rightClickHack = 0;
2388 $showEditLink = $this->mOptions
->getEditSection();
2389 $rightClickHack = $this->mOptions
->getEditSectionOnRightClick();
2392 # Inhibit editsection links if requested in the page
2393 $esw =& MagicWord
::get( MAG_NOEDITSECTION
);
2394 if( $esw->matchAndRemove( $text ) ) {
2397 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2399 $mw =& MagicWord
::get( MAG_NOTOC
);
2400 if( $mw->matchAndRemove( $text ) ) {
2404 # never add the TOC to the Main Page. This is an entry page that should not
2405 # be more than 1-2 screens large anyway
2406 if( $this->mTitle
->getPrefixedText() == wfMsg('mainpage') ) {
2410 # Get all headlines for numbering them and adding funky stuff like [edit]
2411 # links - this is for later, but we need the number of headlines right now
2412 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2414 # if there are fewer than 4 headlines in the article, do not show TOC
2415 if( $numMatches < 4 ) {
2419 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2420 # override above conditions and always show TOC at that place
2421 $mw =& MagicWord
::get( MAG_TOC
);
2422 if ($mw->match( $text ) ) {
2424 $forceTocHere = true;
2426 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2427 # override above conditions and always show TOC above first header
2428 $mw =& MagicWord
::get( MAG_FORCETOC
);
2429 if ($mw->matchAndRemove( $text ) ) {
2436 # We need this to perform operations on the HTML
2437 $sk =& $this->mOptions
->getSkin();
2441 $sectionCount = 0; # headlineCount excluding template sections
2443 # Ugh .. the TOC should have neat indentation levels which can be
2444 # passed to the skin functions. These are determined here
2449 $sublevelCount = array();
2452 foreach( $matches[3] as $headline ) {
2454 $templatetitle = "";
2455 $templatesection = 0;
2457 if (preg_match("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", $headline, $mat)) {
2459 $templatetitle = base64_decode($mat[1]);
2460 $templatesection = 1 +
(int)base64_decode($mat[2]);
2461 $headline = preg_replace("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", "", $headline);
2466 $prevlevel = $level;
2468 $level = $matches[1][$headlineCount];
2469 if( ( $doNumberHeadings ||
$doShowToc ) && $prevlevel && $level > $prevlevel ) {
2470 # reset when we enter a new level
2471 $sublevelCount[$level] = 0;
2472 $toc .= $sk->tocIndent( $level - $prevlevel );
2473 $toclevel +
= $level - $prevlevel;
2475 if( ( $doNumberHeadings ||
$doShowToc ) && $level < $prevlevel ) {
2476 # reset when we step back a level
2477 $sublevelCount[$level+
1]=0;
2478 $toc .= $sk->tocUnindent( $prevlevel - $level );
2479 $toclevel -= $prevlevel - $level;
2481 # count number of headlines for each level
2482 @$sublevelCount[$level]++
;
2483 if( $doNumberHeadings ||
$doShowToc ) {
2485 for( $i = 1; $i <= $level; $i++
) {
2486 if( !empty( $sublevelCount[$i] ) ) {
2490 $numbering .= $wgContLang->formatNum( $sublevelCount[$i] );
2496 # The canonized header is a version of the header text safe to use for links
2497 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2498 $canonized_headline = $this->unstrip( $headline, $this->mStripState
);
2499 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState
);
2501 # Remove link placeholders by the link text.
2502 # <!--LINK number-->
2504 # link text with suffix
2505 $canonized_headline = preg_replace( '/<!--LINK ([0-9]*)-->/e',
2506 "\$wgLinkHolders['texts'][\$1]",
2507 $canonized_headline );
2510 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2511 $tocline = trim( $canonized_headline );
2512 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT
, $wgInputEncoding ) );
2513 $replacearray = array(
2517 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2518 $refer[$headlineCount] = $canonized_headline;
2520 # count how many in assoc. array so we can track dupes in anchors
2521 @$refers[$canonized_headline]++
;
2522 $refcount[$headlineCount]=$refers[$canonized_headline];
2524 # Prepend the number to the heading text
2526 if( $doNumberHeadings ||
$doShowToc ) {
2527 $tocline = $numbering . ' ' . $tocline;
2529 # Don't number the heading if it is the only one (looks silly)
2530 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2531 # the two are different if the line contains a link
2532 $headline=$numbering . ' ' . $headline;
2536 # Create the anchor for linking from the TOC to the section
2537 $anchor = $canonized_headline;
2538 if($refcount[$headlineCount] > 1 ) {
2539 $anchor .= '_' . $refcount[$headlineCount];
2541 if( $doShowToc && ( !isset($wgMaxTocLevel) ||
$toclevel<$wgMaxTocLevel ) ) {
2542 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2544 if( $showEditLink && ( !$istemplate ||
$templatetitle !== "" ) ) {
2545 if ( empty( $head[$headlineCount] ) ) {
2546 $head[$headlineCount] = '';
2549 $head[$headlineCount] .= $sk->editSectionLinkForOther($templatetitle, $templatesection);
2551 $head[$headlineCount] .= $sk->editSectionLink($this->mTitle
, $sectionCount+
1);
2554 # Add the edit section span
2555 if( $rightClickHack ) {
2557 $headline = $sk->editSectionScriptForOther($templatetitle, $templatesection, $headline);
2559 $headline = $sk->editSectionScript($this->mTitle
, $sectionCount+
1,$headline);
2562 # give headline the correct <h#> tag
2563 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.'</h'.$level.'>';
2571 $toclines = $headlineCount;
2572 $toc .= $sk->tocUnindent( $toclevel );
2573 $toc = $sk->tocTable( $toc );
2576 # split up and insert constructed headlines
2578 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2581 foreach( $blocks as $block ) {
2582 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2583 # This is the [edit] link that appears for the top block of text when
2584 # section editing is enabled
2586 # Disabled because it broke block formatting
2587 # For example, a bullet point in the top line
2588 # $full .= $sk->editSectionLink(0);
2591 if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2592 # Top anchor now in skin
2596 if( !empty( $head[$i] ) ) {
2602 $mw =& MagicWord
::get( MAG_TOC
);
2603 return $mw->replace( $toc, $full );
2610 * Return an HTML link for the "ISBN 123456" text
2613 function magicISBN( $text ) {
2615 $fname = 'Parser::magicISBN';
2616 wfProfileIn( $fname );
2618 $a = split( 'ISBN ', ' '.$text );
2619 if ( count ( $a ) < 2 ) {
2620 wfProfileOut( $fname );
2623 $text = substr( array_shift( $a ), 1);
2624 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2626 foreach ( $a as $x ) {
2627 $isbn = $blank = '' ;
2628 while ( ' ' == $x{0} ) {
2630 $x = substr( $x, 1 );
2632 if ( $x == '' ) { # blank isbn
2633 $text .= "ISBN $blank";
2636 while ( strstr( $valid, $x{0} ) != false ) {
2638 $x = substr( $x, 1 );
2640 $num = str_replace( '-', '', $isbn );
2641 $num = str_replace( ' ', '', $num );
2644 $text .= "ISBN $blank$x";
2646 $titleObj = Title
::makeTitle( NS_SPECIAL
, 'Booksources' );
2647 $text .= '<a href="' .
2648 $titleObj->escapeLocalUrl( 'isbn='.$num ) .
2649 "\" class=\"internal\">ISBN $isbn</a>";
2653 wfProfileOut( $fname );
2658 * Return an HTML link for the "GEO ..." text
2661 function magicGEO( $text ) {
2662 global $wgLang, $wgUseGeoMode;
2663 $fname = 'Parser::magicGEO';
2664 wfProfileIn( $fname );
2666 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2667 $directions = array ( 'N' => 'North' , 'S' => 'South' , 'E' => 'East' , 'W' => 'West' ) ;
2668 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['N']}, (\d+)°(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2669 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['N']}, (\d+)°(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2670 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['S']}, (\d+)°(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2671 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['S']}, (\d+)°(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2673 $a = split( 'GEO ', ' '.$text );
2674 if ( count ( $a ) < 2 ) {
2675 wfProfileOut( $fname );
2678 $text = substr( array_shift( $a ), 1);
2679 $valid = '0123456789.+-:';
2681 foreach ( $a as $x ) {
2682 $geo = $blank = '' ;
2683 while ( ' ' == $x{0} ) {
2685 $x = substr( $x, 1 );
2687 while ( strstr( $valid, $x{0} ) != false ) {
2689 $x = substr( $x, 1 );
2691 $num = str_replace( '+', '', $geo );
2692 $num = str_replace( ' ', '', $num );
2694 if ( '' == $num ||
count ( explode ( ':' , $num , 3 ) ) < 2 ) {
2695 $text .= "GEO $blank$x";
2697 $titleObj = Title
::makeTitle( NS_SPECIAL
, 'Geo' );
2698 $text .= '<a href="' .
2699 $titleObj->escapeLocalUrl( 'coordinates='.$num ) .
2700 "\" class=\"internal\">GEO $geo</a>";
2704 wfProfileOut( $fname );
2709 * Return an HTML link for the "RFC 1234" text
2711 * @param string $text text to be processed
2713 function magicRFC( $text, $keyword='RFC ', $urlmsg='rfcurl' ) {
2716 $valid = '0123456789';
2719 $a = split( $keyword, ' '.$text );
2720 if ( count ( $a ) < 2 ) {
2723 $text = substr( array_shift( $a ), 1);
2725 /* Check if keyword is preceed by [[.
2726 * This test is made here cause of the array_shift above
2727 * that prevent the test to be done in the foreach.
2729 if ( substr( $text, -2 ) == '[[' ) {
2733 foreach ( $a as $x ) {
2734 /* token might be empty if we have RFC RFC 1234 */
2742 /** remove and save whitespaces in $blank */
2743 while ( $x{0} == ' ' ) {
2745 $x = substr( $x, 1 );
2748 /** remove and save the rfc number in $id */
2749 while ( strstr( $valid, $x{0} ) != false ) {
2751 $x = substr( $x, 1 );
2755 /* call back stripped spaces*/
2756 $text .= $keyword.$blank.$x;
2757 } elseif( $internal ) {
2759 $text .= $keyword.$id.$x;
2761 /* build the external link*/
2762 $url = wfmsg( $urlmsg );
2763 $url = str_replace( '$1', $id, $url);
2764 $sk =& $this->mOptions
->getSkin();
2765 $la = $sk->getExternalLinkAttributes( $url, $keyword.$id );
2766 $text .= "<a href='{$url}'{$la}>{$keyword}{$id}</a>{$x}";
2769 /* Check if the next RFC keyword is preceed by [[ */
2770 $internal = ( substr($x,-2) == '[[' );
2776 * Transform wiki markup when saving a page by doing \r\n -> \n
2777 * conversion, substitting signatures, {{subst:}} templates, etc.
2779 * @param string $text the text to transform
2780 * @param Title &$title the Title object for the current article
2781 * @param User &$user the User object describing the current user
2782 * @param ParserOptions $options parsing options
2783 * @param bool $clearState whether to clear the parser state first
2784 * @return string the altered wiki markup
2787 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2788 $this->mOptions
= $options;
2789 $this->mTitle
=& $title;
2790 $this->mOutputType
= OT_WIKI
;
2792 if ( $clearState ) {
2793 $this->clearState();
2796 $stripState = false;
2800 $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
2801 $text = $this->strip( $text, $stripState, false );
2802 $text = $this->pstPass2( $text, $user );
2803 $text = $this->unstrip( $text, $stripState );
2804 $text = $this->unstripNoWiki( $text, $stripState );
2809 * Pre-save transform helper function
2812 function pstPass2( $text, &$user ) {
2813 global $wgLang, $wgContLang, $wgLocaltimezone;
2815 # Variable replacement
2816 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2817 $text = $this->replaceVariables( $text );
2821 $n = $user->getName();
2822 $k = $user->getOption( 'nickname' );
2823 if ( '' == $k ) { $k = $n; }
2824 if ( isset( $wgLocaltimezone ) ) {
2825 $oldtz = getenv( 'TZ' );
2826 putenv( 'TZ='.$wgLocaltimezone );
2828 /* Note: this is an ugly timezone hack for the European wikis */
2829 $d = $wgContLang->timeanddate( date( 'YmdHis' ), false ) .
2830 ' (' . date( 'T' ) . ')';
2831 if ( isset( $wgLocaltimezone ) ) {
2832 putenv( 'TZ='.$oldtzs );
2835 $text = preg_replace( '/~~~~~~/', $d, $text );
2836 $text = preg_replace( '/~~~~/', '[[' . $wgContLang->getNsText( NS_USER
) . ":$n|$k]] $d", $text );
2837 $text = preg_replace( '/~~~/', '[[' . $wgContLang->getNsText( NS_USER
) . ":$n|$k]]", $text );
2839 # Context links: [[|name]] and [[name (context)|]]
2841 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2842 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2843 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2844 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2846 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
2847 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
2848 $p3 = "/\[\[(:*$namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]] and [[:namespace:page|]]
2849 $p4 = "/\[\[(:*$namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/"; # [[ns:page (cont)|]] and [[:ns:page (cont)|]]
2851 $t = $this->mTitle
->getText();
2852 if ( preg_match( $conpat, $t, $m ) ) {
2855 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2856 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2857 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2859 if ( '' == $context ) {
2860 $text = preg_replace( $p2, '[[\\1]]', $text );
2862 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2865 # Trim trailing whitespace
2866 # MAG_END (__END__) tag allows for trailing
2867 # whitespace to be deliberately included
2868 $text = rtrim( $text );
2869 $mw =& MagicWord
::get( MAG_END
);
2870 $mw->matchAndRemove( $text );
2876 * Set up some variables which are usually set up in parse()
2877 * so that an external function can call some class members with confidence
2880 function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2881 $this->mTitle
=& $title;
2882 $this->mOptions
= $options;
2883 $this->mOutputType
= $outputType;
2884 if ( $clearState ) {
2885 $this->clearState();
2890 * Transform a MediaWiki message by replacing magic variables.
2892 * @param string $text the text to transform
2893 * @param ParserOptions $options options
2894 * @return string the text with variables substituted
2897 function transformMsg( $text, $options ) {
2899 static $executing = false;
2901 # Guard against infinite recursion
2907 $this->mTitle
= $wgTitle;
2908 $this->mOptions
= $options;
2909 $this->mOutputType
= OT_MSG
;
2910 $this->clearState();
2911 $text = $this->replaceVariables( $text );
2918 * Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2919 * Callback will be called with the text within
2920 * Transform and return the text within
2923 function setHook( $tag, $callback ) {
2924 $oldVal = @$this->mTagHooks
[$tag];
2925 $this->mTagHooks
[$tag] = $callback;
2930 * Replace <!--LINK--> link placeholders with actual links, in the buffer
2931 * Placeholders created in Skin::makeLinkObj()
2932 * Returns an array of links found, indexed by PDBK:
2936 * $options is a bit field, RLH_FOR_UPDATE to select for update
2938 function replaceLinkHolders( &$text, $options = 0 ) {
2939 global $wgUser, $wgLinkCache, $wgUseOldExistenceCheck, $wgLinkHolders;
2940 global $wgInterwikiLinkHolders;
2941 global $outputReplace;
2943 if ( $wgUseOldExistenceCheck ) {
2947 $fname = 'Parser::replaceLinkHolders';
2948 wfProfileIn( $fname );
2953 #if ( !empty( $tmpLinks[0] ) ) { #TODO
2954 if ( !empty( $wgLinkHolders['namespaces'] ) ) {
2955 wfProfileIn( $fname.'-check' );
2956 $dbr =& wfGetDB( DB_SLAVE
);
2957 $cur = $dbr->tableName( 'cur' );
2958 $sk = $wgUser->getSkin();
2959 $threshold = $wgUser->getOption('stubthreshold');
2962 asort( $wgLinkHolders['namespaces'] );
2966 foreach ( $wgLinkHolders['namespaces'] as $key => $val ) {
2968 $title = $wgLinkHolders['titles'][$key];
2970 # Skip invalid entries.
2971 # Result will be ugly, but prevents crash.
2972 if ( is_null( $title ) ) {
2975 $pdbk = $pdbks[$key] = $title->getPrefixedDBkey();
2977 # Check if it's in the link cache already
2978 if ( $wgLinkCache->getGoodLinkID( $pdbk ) ) {
2979 $colours[$pdbk] = 1;
2980 } elseif ( $wgLinkCache->isBadLink( $pdbk ) ) {
2981 $colours[$pdbk] = 0;
2983 # Not in the link cache, add it to the query
2984 if ( !isset( $current ) ) {
2986 $query = "SELECT cur_id, cur_namespace, cur_title";
2987 if ( $threshold > 0 ) {
2988 $query .= ", LENGTH(cur_text) AS cur_len, cur_is_redirect";
2990 $query .= " FROM $cur WHERE (cur_namespace=$val AND cur_title IN(";
2991 } elseif ( $current != $val ) {
2993 $query .= ")) OR (cur_namespace=$val AND cur_title IN(";
2998 $query .= $dbr->addQuotes( $wgLinkHolders['dbkeys'][$key] );
3003 if ( $options & RLH_FOR_UPDATE
) {
3004 $query .= ' FOR UPDATE';
3007 $res = $dbr->query( $query, $fname );
3009 # Fetch data and form into an associative array
3010 # non-existent = broken
3013 while ( $s = $dbr->fetchObject($res) ) {
3014 $title = Title
::makeTitle( $s->cur_namespace
, $s->cur_title
);
3015 $pdbk = $title->getPrefixedDBkey();
3016 $wgLinkCache->addGoodLink( $s->cur_id
, $pdbk );
3018 if ( $threshold > 0 ) {
3019 $size = $s->cur_len
;
3020 if ( $s->cur_is_redirect ||
$s->cur_namespace
!= 0 ||
$length < $threshold ) {
3021 $colours[$pdbk] = 1;
3023 $colours[$pdbk] = 2;
3026 $colours[$pdbk] = 1;
3030 wfProfileOut( $fname.'-check' );
3032 # Construct search and replace arrays
3033 wfProfileIn( $fname.'-construct' );
3034 $outputReplace = array();
3035 foreach ( $wgLinkHolders['namespaces'] as $key => $ns ) {
3036 $pdbk = $pdbks[$key];
3037 $searchkey = '<!--LINK '.$key.'-->';
3038 $title = $wgLinkHolders['titles'][$key];
3039 if ( empty( $colours[$pdbk] ) ) {
3040 $wgLinkCache->addBadLink( $pdbk );
3041 $colours[$pdbk] = 0;
3042 $outputReplace[$searchkey] = $sk->makeBrokenLinkObj( $title,
3043 $wgLinkHolders['texts'][$key],
3044 $wgLinkHolders['queries'][$key] );
3045 } elseif ( $colours[$pdbk] == 1 ) {
3046 $outputReplace[$searchkey] = $sk->makeKnownLinkObj( $title,
3047 $wgLinkHolders['texts'][$key],
3048 $wgLinkHolders['queries'][$key] );
3049 } elseif ( $colours[$pdbk] == 2 ) {
3050 $outputReplace[$searchkey] = $sk->makeStubLinkObj( $title,
3051 $wgLinkHolders['texts'][$key],
3052 $wgLinkHolders['queries'][$key] );
3055 wfProfileOut( $fname.'-construct' );
3058 wfProfileIn( $fname.'-replace' );
3060 $text = preg_replace_callback(
3061 '/(<!--LINK .*?-->)/',
3062 "outputReplaceMatches",
3064 wfProfileOut( $fname.'-replace' );
3067 if ( !empty( $wgInterwikiLinkHolders ) ) {
3068 wfProfileIn( $fname.'-interwiki' );
3069 $outputReplace = $wgInterwikiLinkHolders;
3070 $text = preg_replace_callback(
3071 '/<!--IWLINK (.*?)-->/',
3072 "outputReplaceMatches",
3074 wfProfileOut( $fname.'-interwiki' );
3077 wfProfileOut( $fname );
3082 * Renders an image gallery from a text with one line per image.
3083 * text labels may be given by using |-style alternative text. E.g.
3084 * Image:one.jpg|The number "1"
3085 * Image:tree.jpg|A tree
3086 * given as text will return the HTML of a gallery with two images,
3087 * labeled 'The number "1"' and
3090 function renderImageGallery( $text ) {
3091 global $wgLinkCache;
3092 $ig = new ImageGallery();
3093 $ig->setShowBytes( false );
3094 $ig->setShowFilename( false );
3095 $lines = explode( "\n", $text );
3097 foreach ( $lines as $line ) {
3098 # match lines like these:
3099 # Image:someimage.jpg|This is some image
3100 preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
3102 if ( count( $matches ) == 0 ) {
3105 $nt = Title
::newFromURL( $matches[1] );
3106 if ( isset( $matches[3] ) ) {
3107 $label = $matches[3];
3112 # FIXME: Use the full wiki parser and add its links
3113 # to the page's links.
3114 $html = $this->mOptions
->mSkin
->formatComment( $label );
3116 $ig->add( Image
::newFromTitle( $nt ), $html );
3117 $wgLinkCache->addImageLinkObj( $nt );
3119 return $ig->toHTML();
3125 * @package MediaWiki
3129 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
3130 var $mCacheTime; # Used in ParserCache
3131 var $mVersion; # Compatibility check
3133 function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
3134 $containsOldMagic = false )
3136 $this->mText
= $text;
3137 $this->mLanguageLinks
= $languageLinks;
3138 $this->mCategoryLinks
= $categoryLinks;
3139 $this->mContainsOldMagic
= $containsOldMagic;
3140 $this->mCacheTime
= '';
3141 $this->mVersion
= MW_PARSER_VERSION
;
3144 function getText() { return $this->mText
; }
3145 function getLanguageLinks() { return $this->mLanguageLinks
; }
3146 function getCategoryLinks() { return array_keys( $this->mCategoryLinks
); }
3147 function getCacheTime() { return $this->mCacheTime
; }
3148 function containsOldMagic() { return $this->mContainsOldMagic
; }
3149 function setText( $text ) { return wfSetVar( $this->mText
, $text ); }
3150 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks
, $ll ); }
3151 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks
, $cl ); }
3152 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic
, $com ); }
3153 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime
, $t ); }
3154 function addCategoryLink( $c ) { $this->mCategoryLinks
[$c] = 1; }
3156 function merge( $other ) {
3157 $this->mLanguageLinks
= array_merge( $this->mLanguageLinks
, $other->mLanguageLinks
);
3158 $this->mCategoryLinks
= array_merge( $this->mCategoryLinks
, $this->mLanguageLinks
);
3159 $this->mContainsOldMagic
= $this->mContainsOldMagic ||
$other->mContainsOldMagic
;
3163 * Return true if this cached output object predates the global or
3164 * per-article cache invalidation timestamps, or if it comes from
3165 * an incompatible older version.
3167 * @param string $touched the affected article's last touched timestamp
3171 function expired( $touched ) {
3172 global $wgCacheEpoch;
3173 return $this->getCacheTime() <= $touched ||
3174 $this->getCacheTime() <= $wgCacheEpoch ||
3175 !isset( $this->mVersion
) ||
3176 version_compare( $this->mVersion
, MW_PARSER_VERSION
, "lt" );
3181 * Set options of the Parser
3183 * @package MediaWiki
3187 # All variables are private
3188 var $mUseTeX; # Use texvc to expand <math> tags
3189 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
3190 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
3191 var $mAllowExternalImages; # Allow external images inline
3192 var $mSkin; # Reference to the preferred skin
3193 var $mDateFormat; # Date format index
3194 var $mEditSection; # Create "edit section" links
3195 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
3196 var $mNumberHeadings; # Automatically number headings
3197 var $mShowToc; # Show table of contents
3199 function getUseTeX() { return $this->mUseTeX
; }
3200 function getUseDynamicDates() { return $this->mUseDynamicDates
; }
3201 function getInterwikiMagic() { return $this->mInterwikiMagic
; }
3202 function getAllowExternalImages() { return $this->mAllowExternalImages
; }
3203 function getSkin() { return $this->mSkin
; }
3204 function getDateFormat() { return $this->mDateFormat
; }
3205 function getEditSection() { return $this->mEditSection
; }
3206 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick
; }
3207 function getNumberHeadings() { return $this->mNumberHeadings
; }
3208 function getShowToc() { return $this->mShowToc
; }
3210 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX
, $x ); }
3211 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates
, $x ); }
3212 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic
, $x ); }
3213 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages
, $x ); }
3214 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat
, $x ); }
3215 function setEditSection( $x ) { return wfSetVar( $this->mEditSection
, $x ); }
3216 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick
, $x ); }
3217 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings
, $x ); }
3218 function setShowToc( $x ) { return wfSetVar( $this->mShowToc
, $x ); }
3220 function setSkin( &$x ) { $this->mSkin
=& $x; }
3222 # Get parser options
3223 /* static */ function newFromUser( &$user ) {
3224 $popts = new ParserOptions
;
3225 $popts->initialiseFromUser( $user );
3230 function initialiseFromUser( &$userInput ) {
3231 global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
3232 $fname = 'ParserOptions::initialiseFromUser';
3233 wfProfileIn( $fname );
3234 if ( !$userInput ) {
3236 $user->setLoaded( true );
3238 $user =& $userInput;
3241 $this->mUseTeX
= $wgUseTeX;
3242 $this->mUseDynamicDates
= $wgUseDynamicDates;
3243 $this->mInterwikiMagic
= $wgInterwikiMagic;
3244 $this->mAllowExternalImages
= $wgAllowExternalImages;
3245 wfProfileIn( $fname.'-skin' );
3246 $this->mSkin
=& $user->getSkin();
3247 wfProfileOut( $fname.'-skin' );
3248 $this->mDateFormat
= $user->getOption( 'date' );
3249 $this->mEditSection
= $user->getOption( 'editsection' );
3250 $this->mEditSectionOnRightClick
= $user->getOption( 'editsectiononrightclick' );
3251 $this->mNumberHeadings
= $user->getOption( 'numberheadings' );
3252 $this->mShowToc
= $user->getOption( 'showtoc' );
3253 wfProfileOut( $fname );
3260 * Callback function used by Parser::replaceLinkHolders()
3261 * to substitute link placeholders.
3263 function &outputReplaceMatches( $matches ) {
3264 global $outputReplace;
3265 return $outputReplace[$matches[1]];
3269 * Return the total number of articles
3271 function wfNumberOfArticles() {
3272 global $wgNumberOfArticles;
3275 return $wgNumberOfArticles;
3279 * Get various statistics from the database
3282 function wfLoadSiteStats() {
3283 global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
3284 $fname = 'wfLoadSiteStats';
3286 if ( -1 != $wgNumberOfArticles ) return;
3287 $dbr =& wfGetDB( DB_SLAVE
);
3288 $s = $dbr->selectRow( 'site_stats',
3289 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
3290 array( 'ss_row_id' => 1 ), $fname
3293 if ( $s === false ) {
3296 $wgTotalViews = $s->ss_total_views
;
3297 $wgTotalEdits = $s->ss_total_edits
;
3298 $wgNumberOfArticles = $s->ss_good_articles
;
3302 function wfEscapeHTMLTagsOnly( $in ) {
3304 array( '"', '>', '<' ),
3305 array( '"', '>', '<' ),