3 // require_once('Tokenizer.php');
8 * Processes wiki markup
10 * There are two main entry points into the Parser class:
12 * produces HTML output
14 * produces altered wiki markup.
17 * objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
19 * NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
22 * $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
23 * $wgNamespacesWithSubpages, $wgAllowExternalImages*,
26 * * only within ParserOptions
32 * Variable substitution O(N^2) attack
34 * Without countermeasures, it would be possible to attack the parser by saving
35 * a page filled with a large number of inclusions of large pages. The size of
36 * the generated page would be proportional to the square of the input size.
37 * Hence, we limit the number of inclusions of any given page, thus bringing any
38 * attack back to O(N).
40 define( 'MAX_INCLUDE_REPEAT', 100 );
41 define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Million
43 # Allowed values for $mOutputType
44 define( 'OT_HTML', 1 );
45 define( 'OT_WIKI', 2 );
46 define( 'OT_MSG' , 3 );
48 # string parameter for extractTags which will cause it
49 # to strip HTML comments in addition to regular
50 # <XML>-style tags. This should not be anything we
51 # may want to use in wikisyntax
52 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
54 # prefix for escaping, used in two functions at least
55 define( 'UNIQ_PREFIX', 'NaodW29');
57 # Constants needed for external link processing
58 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
59 define( 'HTTP_PROTOCOLS', 'http|https' );
60 # Everything except bracket, space, or control characters
61 define( 'EXT_LINK_URL_CLASS', '[^]\\x00-\\x20\\x7F]' );
62 define( 'INVERSE_EXT_LINK_URL_CLASS', '[\]\\x00-\\x20\\x7F]' );
64 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
65 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
66 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
67 define( 'EXT_LINK_BRACKETED', '/\[(('.URL_PROTOCOLS
.'):'.EXT_LINK_URL_CLASS
.'+) *('.EXT_LINK_TEXT_CLASS
.'*?)\]/S' );
68 define( 'EXT_IMAGE_REGEX',
69 '/^('.HTTP_PROTOCOLS
.':)'. # Protocol
70 '('.EXT_LINK_URL_CLASS
.'+)\\/'. # Hostname and path
71 '('.EXT_IMAGE_FNAME_CLASS
.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS
.')$/S' # Filename
83 # Cleared with clearState():
84 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
85 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
88 var $mOptions, $mTitle, $mOutputType,
89 $mTemplates, // cache of already loaded templates, avoids
90 // multiple SQL queries for the same string
91 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
92 // in this path. Used for loop detection.
95 $this->mTemplates
= array();
96 $this->mTemplatePath
= array();
97 $this->mTagHooks
= array();
101 function clearState() {
102 $this->mOutput
= new ParserOutput
;
103 $this->mAutonumber
= 0;
104 $this->mLastSection
= "";
105 $this->mDTopen
= false;
106 $this->mVariables
= false;
107 $this->mIncludeCount
= array();
108 $this->mStripState
= array();
109 $this->mArgStack
= array();
110 $this->mInPre
= false;
113 # First pass--just handle <nowiki> sections, pass the rest off
114 # to internalParse() which does all the real work.
116 # Returns a ParserOutput
118 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
120 $fname = 'Parser::parse';
121 wfProfileIn( $fname );
127 $this->mOptions
= $options;
128 $this->mTitle
=& $title;
129 $this->mOutputType
= OT_HTML
;
132 $text = $this->strip( $text, $this->mStripState
);
133 $text = $this->internalParse( $text, $linestart );
134 $text = $this->unstrip( $text, $this->mStripState
);
135 # Clean up special characters, only run once, next-to-last before doBlockLevels
138 # french spaces, last one Guillemet-left
139 # only if there is something before the space
140 '/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1 \\2',
141 # french spaces, Guillemet-right
142 "/(\\302\\253) /i"=>"\\1 ",
143 '/<hr *>/i' => '<hr />',
144 '/<br *>/i' => '<br />',
145 '/<center *>/i' => '<div class="center">',
146 '/<\\/center *>/i' => '</div>',
147 # Clean up spare ampersands; note that we probably ought to be
148 # more careful about named entities.
149 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&'
151 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
154 # french spaces, last one Guillemet-left
155 '/ (\\?|:|;|!|\\302\\273)/i' => ' \\1',
156 # french spaces, Guillemet-right
157 '/(\\302\\253) /i' => '\\1 ',
158 '/<center *>/i' => '<div class="center">',
159 '/<\\/center *>/i' => '</div>'
161 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
164 $text = $this->doBlockLevels( $text, $linestart );
165 $text = $this->unstripNoWiki( $text, $this->mStripState
);
167 $text = $this->tidy($text);
169 $this->mOutput
->setText( $text );
170 wfProfileOut( $fname );
171 return $this->mOutput
;
174 /* static */ function getRandomString() {
175 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
178 # Replaces all occurrences of <$tag>content</$tag> in the text
179 # with a random marker and returns the new text. the output parameter
180 # $content will be an associative array filled with data on the form
181 # $unique_marker => content.
183 # If $content is already set, the additional entries will be appended
185 # If $tag is set to STRIP_COMMENTS, the function will extract
186 # <!-- HTML comments -->
188 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ''){
189 $rnd = $uniq_prefix . '-' . $tag . Parser
::getRandomString();
196 while ( '' != $text ) {
197 if($tag==STRIP_COMMENTS
) {
198 $p = preg_split( '/<!--/i', $text, 2 );
200 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
203 if ( ( count( $p ) < 2 ) ||
( '' == $p[1] ) ) {
206 if($tag==STRIP_COMMENTS
) {
207 $q = preg_split( '/-->/i', $p[1], 2 );
209 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
211 $marker = $rnd . sprintf('%08X', $n++
);
212 $content[$marker] = $q[0];
213 $stripped .= $marker;
220 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
221 # If $render is set, performs necessary rendering operations on plugins
222 # Returns the text, and fills an array with data needed in unstrip()
223 # If the $state is already a valid strip state, it adds to the state
225 # When $stripcomments is set, HTML comments <!-- like this -->
226 # will be stripped in addition to other tags. This is important
227 # for section editing, where these comments cause confusion when
228 # counting the sections in the wikisource
229 function strip( $text, &$state, $stripcomments = false ) {
230 $render = ($this->mOutputType
== OT_HTML
);
231 $html_content = array();
232 $nowiki_content = array();
233 $math_content = array();
234 $pre_content = array();
235 $comment_content = array();
236 $ext_content = array();
238 # Replace any instances of the placeholders
239 $uniq_prefix = UNIQ_PREFIX
;
240 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
243 global $wgRawHtml, $wgWhitelistEdit;
244 if( $wgRawHtml && $wgWhitelistEdit ) {
245 $text = Parser
::extractTags('html', $text, $html_content, $uniq_prefix);
246 foreach( $html_content as $marker => $content ) {
248 # Raw and unchecked for validity.
249 $html_content[$marker] = $content;
251 $html_content[$marker] = '<html>'.$content.'</html>';
257 $text = Parser
::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
258 foreach( $nowiki_content as $marker => $content ) {
260 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
262 $nowiki_content[$marker] = '<nowiki>'.$content.'</nowiki>';
267 $text = Parser
::extractTags('math', $text, $math_content, $uniq_prefix);
268 foreach( $math_content as $marker => $content ){
270 if( $this->mOptions
->getUseTeX() ) {
271 $math_content[$marker] = renderMath( $content );
273 $math_content[$marker] = '<math>'.$content.'<math>';
276 $math_content[$marker] = '<math>'.$content.'</math>';
281 $text = Parser
::extractTags('pre', $text, $pre_content, $uniq_prefix);
282 foreach( $pre_content as $marker => $content ){
284 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
286 $pre_content[$marker] = '<pre>'.$content.'</pre>';
292 $text = Parser
::extractTags(STRIP_COMMENTS
, $text, $comment_content, $uniq_prefix);
293 foreach( $comment_content as $marker => $content ){
294 $comment_content[$marker] = '<!--'.$content.'-->';
299 foreach ( $this->mTagHooks
as $tag => $callback ) {
300 $ext_contents[$tag] = array();
301 $text = Parser
::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
302 foreach( $ext_content[$tag] as $marker => $content ) {
304 $ext_content[$tag][$marker] = $callback( $content );
306 $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
311 # Merge state with the pre-existing state, if there is one
313 $state['html'] = $state['html'] +
$html_content;
314 $state['nowiki'] = $state['nowiki'] +
$nowiki_content;
315 $state['math'] = $state['math'] +
$math_content;
316 $state['pre'] = $state['pre'] +
$pre_content;
317 $state['comment'] = $state['comment'] +
$comment_content;
319 foreach( $ext_content as $tag => $array ) {
320 if ( array_key_exists( $tag, $state ) ) {
321 $state[$tag] = $state[$tag] +
$array;
326 'html' => $html_content,
327 'nowiki' => $nowiki_content,
328 'math' => $math_content,
329 'pre' => $pre_content,
330 'comment' => $comment_content,
336 # always call unstripNoWiki() after this one
337 function unstrip( $text, &$state ) {
338 # Must expand in reverse order, otherwise nested tags will be corrupted
339 $contentDict = end( $state );
340 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
341 if( key($state) != 'nowiki' && key($state) != 'html') {
342 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
343 $text = str_replace( key( $contentDict ), $content, $text );
350 # always call this after unstrip() to preserve the order
351 function unstripNoWiki( $text, &$state ) {
352 # Must expand in reverse order, otherwise nested tags will be corrupted
353 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
354 $text = str_replace( key( $state['nowiki'] ), $content, $text );
359 for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
360 $text = str_replace( key( $state['html'] ), $content, $text );
367 # Add an item to the strip state
368 # Returns the unique tag which must be inserted into the stripped text
369 # The tag will be replaced with the original text in unstrip()
370 function insertStripItem( $text, &$state ) {
371 $rnd = UNIQ_PREFIX
. '-item' . Parser
::getRandomString();
380 $state['item'][$rnd] = $text;
384 # Return allowed HTML attributes
385 function getHTMLattrs () {
386 $htmlattrs = array( # Allowed attributes--no scripting, etc.
387 'title', 'align', 'lang', 'dir', 'width', 'height',
388 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
389 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
390 /* FONT */ 'type', 'start', 'value', 'compact',
391 /* For various lists, mostly deprecated but safe */
392 'summary', 'width', 'border', 'frame', 'rules',
393 'cellspacing', 'cellpadding', 'valign', 'char',
394 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
395 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
396 'id', 'class', 'name', 'style' /* For CSS */
401 # Remove non approved attributes and javascript in css
402 function fixTagAttributes ( $t ) {
403 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
404 $htmlattrs = $this->getHTMLattrs() ;
406 # Strip non-approved attributes from the tag
408 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
409 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
412 $t = str_replace ( '<></>' , '' , $t ) ; # This should fix bug 980557
414 # Strip javascript "expression" from stylesheets. Brute force approach:
415 # If anythin offensive is found, all attributes of the HTML tag are dropped
418 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
419 wfMungeToUtf8( $t ) ) )
427 # interface with html tidy, used if $wgUseTidy = true
428 function tidy ( $text ) {
429 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
430 global $wgInputEncoding, $wgOutputEncoding;
431 $fname = 'Parser::tidy';
432 wfProfileIn( $fname );
435 switch(strtoupper($wgOutputEncoding)) {
437 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)?
' -latin1':' -raw';
440 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)?
' -utf8':' -raw';
443 $wgTidyOpts .= ' -raw';
446 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
447 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
448 '<head><title>test</title></head><body>'.$text.'</body></html>';
449 $descriptorspec = array(
450 0 => array('pipe', 'r'),
451 1 => array('pipe', 'w'),
452 2 => array('file', '/dev/null', 'a')
454 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
455 if (is_resource($process)) {
456 fwrite($pipes[0], $wrappedtext);
458 while (!feof($pipes[1])) {
459 $cleansource .= fgets($pipes[1], 1024);
462 $return_value = proc_close($process);
465 wfProfileOut( $fname );
467 if( $cleansource == '' && $text != '') {
468 wfDebug( "Tidy error detected!\n" );
469 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
475 # parse the wiki syntax used to render tables
476 function doTableStuff ( $t ) {
477 $fname = 'Parser::doTableStuff';
478 wfProfileIn( $fname );
480 $t = explode ( "\n" , $t ) ;
481 $td = array () ; # Is currently a td tag open?
482 $ltd = array () ; # Was it TD or TH?
483 $tr = array () ; # Is currently a tr tag open?
484 $ltr = array () ; # tr attributes
485 $indent_level = 0; # indent level of the table
486 foreach ( $t AS $k => $x )
489 $fc = substr ( $x , 0 , 1 ) ;
490 if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
491 $indent_level = strlen( $matches[1] );
493 str_repeat( '<dl><dd>', $indent_level ) .
494 '<table ' . $this->fixTagAttributes ( $matches[2] ) . '>' ;
495 array_push ( $td , false ) ;
496 array_push ( $ltd , '' ) ;
497 array_push ( $tr , false ) ;
498 array_push ( $ltr , '' ) ;
500 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
501 else if ( '|}' == substr ( $x , 0 , 2 ) ) {
503 $l = array_pop ( $ltd ) ;
504 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
505 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
507 $t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
509 else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
510 $x = substr ( $x , 1 ) ;
511 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
513 $l = array_pop ( $ltd ) ;
514 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
515 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
518 array_push ( $tr , false ) ;
519 array_push ( $td , false ) ;
520 array_push ( $ltd , '' ) ;
521 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
523 else if ( '|' == $fc ||
'!' == $fc ||
'|+' == substr ( $x , 0 , 2 ) ) { # Caption
524 if ( '|+' == substr ( $x , 0 , 2 ) ) {
526 $x = substr ( $x , 1 ) ;
528 $after = substr ( $x , 1 ) ;
529 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
530 $after = explode ( '||' , $after ) ;
532 foreach ( $after AS $theline )
537 $tra = array_pop ( $ltr ) ;
538 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
539 array_push ( $tr , true ) ;
540 array_push ( $ltr , '' ) ;
543 $l = array_pop ( $ltd ) ;
544 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
545 if ( $fc == '|' ) $l = 'td' ;
546 else if ( $fc == '!' ) $l = 'th' ;
547 else if ( $fc == '+' ) $l = 'caption' ;
549 array_push ( $ltd , $l ) ;
550 $y = explode ( '|' , $theline , 2 ) ;
551 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
552 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
554 array_push ( $td , true ) ;
559 # Closing open td, tr && table
560 while ( count ( $td ) > 0 )
562 if ( array_pop ( $td ) ) $t[] = '</td>' ;
563 if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
567 $t = implode ( "\n" , $t ) ;
568 # $t = $this->removeHTMLtags( $t );
569 wfProfileOut( $fname );
573 # Parses the text and adds the result to the strip state
574 # Returns the strip tag
575 function stripParse( $text, $newline, $args ) {
576 $text = $this->strip( $text, $this->mStripState
);
577 $text = $this->internalParse( $text, (bool)$newline, $args, false );
578 return $newline.$this->insertStripItem( $text, $this->mStripState
);
581 function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
584 $fname = 'Parser::internalParse';
585 wfProfileIn( $fname );
587 $text = $this->removeHTMLtags( $text );
588 $text = $this->replaceVariables( $text, $args );
590 $text = $wgLang->convert($text);
592 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
594 $text = $this->doHeadings( $text );
595 if($this->mOptions
->getUseDynamicDates()) {
596 global $wgDateFormatter;
597 $text = $wgDateFormatter->reformat( $this->mOptions
->getDateFormat(), $text );
599 $text = $this->doAllQuotes( $text );
600 $text = $this->replaceExternalLinks( $text );
601 $text = $this->doMagicLinks( $text );
602 $text = $this->replaceInternalLinks ( $text );
603 $text = $this->replaceInternalLinks ( $text );
605 $text = $this->unstrip( $text, $this->mStripState
);
606 $text = $this->unstripNoWiki( $text, $this->mStripState
);
608 $text = $this->doTableStuff( $text );
609 $text = $this->formatHeadings( $text, $isMain );
610 $sk =& $this->mOptions
->getSkin();
611 $text = $sk->transformContent( $text );
613 wfProfileOut( $fname );
617 /* private */ function &doMagicLinks( &$text ) {
618 global $wgUseGeoMode;
619 $text = $this->magicISBN( $text );
620 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
621 $text = $this->magicGEO( $text );
623 $text = $this->magicRFC( $text );
627 # Parse ^^ tokens and return html
628 /* private */ function doExponent ( $text ) {
629 $fname = 'Parser::doExponent';
630 wfProfileIn( $fname);
631 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
632 wfProfileOut( $fname);
636 # Parse headers and return html
637 /* private */ function doHeadings( $text ) {
638 $fname = 'Parser::doHeadings';
639 wfProfileIn( $fname );
640 for ( $i = 6; $i >= 1; --$i ) {
641 $h = substr( '======', 0, $i );
642 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
643 "<h{$i}>\\1</h{$i}>\\2", $text );
645 wfProfileOut( $fname );
649 /* private */ function doAllQuotes( $text ) {
650 $fname = 'Parser::doAllQuotes';
651 wfProfileIn( $fname );
653 $lines = explode( "\n", $text );
654 foreach ( $lines as $line ) {
655 $outtext .= $this->doQuotes ( $line ) . "\n";
657 $outtext = substr($outtext, 0,-1);
658 wfProfileOut( $fname );
662 /* private */ function doQuotes( $text ) {
663 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE
);
664 if (count ($arr) == 1)
668 # First, do some preliminary work. This may shift some apostrophes from
669 # being mark-up to being text. It also counts the number of occurrences
670 # of bold and italics mark-ups.
678 # If there are ever four apostrophes, assume the first is supposed to
679 # be text, and the remaining three constitute mark-up for bold text.
680 if (strlen ($arr[$i]) == 4)
685 # If there are more than 5 apostrophes in a row, assume they're all
686 # text except for the last 5.
687 else if (strlen ($arr[$i]) > 5)
689 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
692 # Count the number of occurrences of bold and italics mark-ups.
693 # We are not counting sequences of five apostrophes.
694 if (strlen ($arr[$i]) == 2) $numitalics++
; else
695 if (strlen ($arr[$i]) == 3) $numbold++
; else
696 if (strlen ($arr[$i]) == 5) { $numitalics++
; $numbold++
; }
701 # If there is an odd number of both bold and italics, it is likely
702 # that one of the bold ones was meant to be an apostrophe followed
703 # by italics. Which one we cannot know for certain, but it is more
704 # likely to be one that has a single-letter word before it.
705 if (($numbold %
2 == 1) && ($numitalics %
2 == 1))
708 $firstsingleletterword = -1;
709 $firstmultiletterword = -1;
713 if (($i %
2 == 1) and (strlen ($r) == 3))
715 $x1 = substr ($arr[$i-1], -1);
716 $x2 = substr ($arr[$i-1], -2, 1);
718 if ($firstspace == -1) $firstspace = $i;
719 } else if ($x2 == ' ') {
720 if ($firstsingleletterword == -1) $firstsingleletterword = $i;
722 if ($firstmultiletterword == -1) $firstmultiletterword = $i;
728 # If there is a single-letter word, use it!
729 if ($firstsingleletterword > -1)
731 $arr [ $firstsingleletterword ] = "''";
732 $arr [ $firstsingleletterword-1 ] .= "'";
734 # If not, but there's a multi-letter word, use that one.
735 else if ($firstmultiletterword > -1)
737 $arr [ $firstmultiletterword ] = "''";
738 $arr [ $firstmultiletterword-1 ] .= "'";
740 # ... otherwise use the first one that has neither.
741 # (notice that it is possible for all three to be -1 if, for example,
742 # there is only one pentuple-apostrophe in the line)
743 else if ($firstspace > -1)
745 $arr [ $firstspace ] = "''";
746 $arr [ $firstspace-1 ] .= "'";
750 # Now let's actually convert our apostrophic mush to HTML!
759 if ($state == 'both')
766 if (strlen ($r) == 2)
769 { $output .= '</i>'; $state = ''; }
770 else if ($state == 'bi')
771 { $output .= '</i>'; $state = 'b'; }
772 else if ($state == 'ib')
773 { $output .= '</b></i><b>'; $state = 'b'; }
774 else if ($state == 'both')
775 { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
776 else # $state can be 'b' or ''
777 { $output .= '<i>'; $state .= 'i'; }
779 else if (strlen ($r) == 3)
782 { $output .= '</b>'; $state = ''; }
783 else if ($state == 'bi')
784 { $output .= '</i></b><i>'; $state = 'i'; }
785 else if ($state == 'ib')
786 { $output .= '</b>'; $state = 'i'; }
787 else if ($state == 'both')
788 { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
789 else # $state can be 'i' or ''
790 { $output .= '<b>'; $state .= 'b'; }
792 else if (strlen ($r) == 5)
795 { $output .= '</b><i>'; $state = 'i'; }
796 else if ($state == 'i')
797 { $output .= '</i><b>'; $state = 'b'; }
798 else if ($state == 'bi')
799 { $output .= '</i></b>'; $state = ''; }
800 else if ($state == 'ib')
801 { $output .= '</b></i>'; $state = ''; }
802 else if ($state == 'both')
803 { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
804 else # ($state == '')
805 { $buffer = ''; $state = 'both'; }
810 # Now close all remaining tags. Notice that the order is important.
811 if ($state == 'b' ||
$state == 'ib')
813 if ($state == 'i' ||
$state == 'bi' ||
$state == 'ib')
817 if ($state == 'both')
818 $output .= '<b><i>'.$buffer.'</i></b>';
823 # Note: we have to do external links before the internal ones,
824 # and otherwise take great care in the order of things here, so
825 # that we don't end up interpreting some URLs twice.
827 /* private */ function replaceExternalLinks( $text ) {
828 $fname = 'Parser::replaceExternalLinks';
829 wfProfileIn( $fname );
831 $sk =& $this->mOptions
->getSkin();
832 $linktrail = wfMsg('linktrail');
833 $bits = preg_split( EXT_LINK_BRACKETED
, $text, -1, PREG_SPLIT_DELIM_CAPTURE
);
835 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
838 while ( $i<count( $bits ) ) {
840 $protocol = $bits[$i++
];
842 $trail = $bits[$i++
];
844 # If the link text is an image URL, replace it with an <img> tag
845 # This happened by accident in the original parser, but some people used it extensively
846 $img = $this->maybeMakeImageLink( $text );
847 if ( $img !== false ) {
853 # No link text, e.g. [http://domain.tld/some.link]
855 # Autonumber if allowed
856 if ( strpos( HTTP_PROTOCOLS
, $protocol ) !== false ) {
857 $text = '[' . ++
$this->mAutonumber
. ']';
859 # Otherwise just use the URL
860 $text = htmlspecialchars( $url );
863 # Have link text, e.g. [http://domain.tld/some.link text]s
865 if ( preg_match( $linktrail, $trail, $m2 ) ) {
871 $encUrl = htmlspecialchars( $url );
872 # Bit in parentheses showing the URL for the printable version
873 if( $url == $text ||
preg_match( "!$protocol://" . preg_quote( $text, '/' ) . "/?$!", $url ) ) {
876 # Expand the URL for printable version
877 if ( ! $sk->suppressUrlExpansion() ) {
878 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
884 # Process the trail (i.e. everything after this link up until start of the next link),
885 # replacing any non-bracketed links
886 $trail = $this->replaceFreeExternalLinks( $trail );
888 $la = $sk->getExternalLinkAttributes( $url, $text );
890 # Use the encoded URL
891 # This means that users can paste URLs directly into the text
892 # Funny characters like ö aren't valid in URLs anyway
893 # This was changed in August 2004
894 $s .= "<a href=\"{$url}\" {$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
897 wfProfileOut( $fname );
901 # Replace anything that looks like a URL with a link
902 function replaceFreeExternalLinks( $text ) {
903 $bits = preg_split( '/((?:'.URL_PROTOCOLS
.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE
);
904 $s = array_shift( $bits );
907 $sk =& $this->mOptions
->getSkin();
909 while ( $i < count( $bits ) ){
910 $protocol = $bits[$i++
];
911 $remainder = $bits[$i++
];
913 if ( preg_match( '/^('.EXT_LINK_URL_CLASS
.'+)(.*)$/s', $remainder, $m ) ) {
914 # Found some characters after the protocol that look promising
915 $url = $protocol . $m[1];
918 # Move trailing punctuation to $trail
920 # If there is no left bracket, then consider right brackets fair game too
921 if ( strpos( $url, '(' ) === false ) {
925 $numSepChars = strspn( strrev( $url ), $sep );
926 if ( $numSepChars ) {
927 $trail = substr( $url, -$numSepChars ) . $trail;
928 $url = substr( $url, 0, -$numSepChars );
931 # Replace & from obsolete syntax with &
932 $url = str_replace( '&', '&', $url );
934 # Is this an external image?
935 $text = $this->maybeMakeImageLink( $url );
936 if ( $text === false ) {
937 # Not an image, make a link
938 $text = $sk->makeExternalLink( $url, $url );
940 $s .= $text . $trail;
942 $s .= $protocol . $remainder;
948 # make an image if it's allowed
949 function maybeMakeImageLink( $url ) {
950 $sk =& $this->mOptions
->getSkin();
952 if ( $this->mOptions
->getAllowExternalImages() ) {
953 if ( preg_match( EXT_IMAGE_REGEX
, $url ) ) {
955 $text = $sk->makeImage( htmlspecialchars( $url ) );
961 # The wikilinks [[ ]] are procedeed here.
962 /* private */ function replaceInternalLinks( $s ) {
963 global $wgLang, $wgLinkCache;
964 global $wgNamespacesWithSubpages;
965 static $fname = 'Parser::replaceInternalLinks' ;
966 wfProfileIn( $fname );
968 wfProfileIn( $fname.'-setup' );
970 # the % is needed to support urlencoded titles as well
971 if ( !$tc ) { $tc = Title
::legalChars() . '#%'; }
972 $sk =& $this->mOptions
->getSkin();
974 $redirect = MagicWord
::get ( MAG_REDIRECT
) ;
976 $a = explode( '[[', ' ' . $s );
977 $s = array_shift( $a );
978 $s = substr( $s, 1 );
980 # Match a link having the form [[namespace:link|alternate]]trail
982 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
983 # Match the end of a line for a word that's not followed by whitespace,
984 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
985 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
987 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
988 # Special and Media are pseudo-namespaces; no pages actually exist in them
990 $nottalk = !Namespace::isTalk( $this->mTitle
->getNamespace() );
992 if ( $useLinkPrefixExtension ) {
993 if ( preg_match( $e2, $s, $m ) ) {
994 $first_prefix = $m[2];
997 $first_prefix = false;
1003 wfProfileOut( $fname.'-setup' );
1005 # start procedeeding each line
1006 foreach ( $a as $line ) {
1007 wfProfileIn( $fname.'-prefixhandling' );
1008 if ( $useLinkPrefixExtension ) {
1009 if ( preg_match( $e2, $s, $m ) ) {
1017 $prefix = $first_prefix;
1018 $first_prefix = false;
1021 wfProfileOut( $fname.'-prefixhandling' );
1023 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1025 # fix up urlencoded title texts
1026 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1028 } else { # Invalid form; output directly
1029 $s .= $prefix . '[[' . $line ;
1035 # :Foobar -- override special treatment of prefix (images, language links)
1036 # /Foobar -- convert to CurrentPage/Foobar
1037 # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1039 # Look at the first character
1040 $c = substr($m[1],0,1);
1041 $noforce = ($c != ':');
1045 # / at end means we don't want the slash to be shown
1046 if(substr($m[1],-1,1)=='/') {
1047 $m[1]=substr($m[1],1,strlen($m[1])-2);
1050 $noslash=substr($m[1],1);
1053 # Some namespaces don't allow subpages
1054 if(!empty($wgNamespacesWithSubpages[$this->mTitle
->getNamespace()])) {
1055 # subpages allowed here
1056 $link = $this->mTitle
->getPrefixedText(). '/' . trim($noslash);
1059 } # this might be changed for ugliness reasons
1061 # no subpage allowed, use standard link
1065 } elseif( $noforce ) { # no subpage
1068 # We don't want to keep the first character
1069 $link = substr( $m[1], 1 );
1072 $wasblank = ( '' == $text );
1073 if( $wasblank ) $text = $link;
1075 $nt = Title
::newFromText( $link );
1077 $s .= $prefix . '[[' . $line;
1081 $ns = $nt->getNamespace();
1082 $iw = $nt->getInterWiki();
1084 # Link not escaped by : , create the various objects
1088 if( $iw && $this->mOptions
->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1089 array_push( $this->mOutput
->mLanguageLinks
, $nt->getFullText() );
1090 $tmp = $prefix . $trail ;
1091 $s .= (trim($tmp) == '')?
'': $tmp;
1095 if ( $ns == NS_IMAGE
) {
1096 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1097 $wgLinkCache->addImageLinkObj( $nt );
1101 if ( $ns == NS_CATEGORY
) {
1102 $t = $nt->getText() ;
1103 $nnt = Title
::newFromText ( Namespace::getCanonicalName(NS_CATEGORY
).':'.$t ) ;
1105 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1106 $pPLC=$sk->postParseLinkColour();
1107 $sk->postParseLinkColour( false );
1108 $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1109 $sk->postParseLinkColour( $pPLC );
1110 $wgLinkCache->resume();
1113 if ( $this->mTitle
->getNamespace() == NS_CATEGORY
) {
1114 $sortkey = $this->mTitle
->getText();
1116 $sortkey = $this->mTitle
->getPrefixedText();
1121 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1122 $this->mOutput
->mCategoryLinks
[] = $t ;
1123 $s .= $prefix . $trail ;
1128 if( ( $nt->getPrefixedText() === $this->mTitle
->getPrefixedText() ) &&
1129 ( strpos( $link, '#' ) === FALSE ) ) {
1130 # Self-links are handled specially; generally de-link and change to bold.
1131 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1135 if( $ns == NS_MEDIA
) {
1136 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1137 $wgLinkCache->addImageLinkObj( $nt );
1139 } elseif( $ns == NS_SPECIAL
) {
1140 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1143 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1145 wfProfileOut( $fname );
1149 # Some functions here used by doBlockLevels()
1151 /* private */ function closeParagraph() {
1153 if ( '' != $this->mLastSection
) {
1154 $result = '</' . $this->mLastSection
. ">\n";
1156 $this->mInPre
= false;
1157 $this->mLastSection
= '';
1160 # getCommon() returns the length of the longest common substring
1161 # of both arguments, starting at the beginning of both.
1163 /* private */ function getCommon( $st1, $st2 ) {
1164 $fl = strlen( $st1 );
1165 $shorter = strlen( $st2 );
1166 if ( $fl < $shorter ) { $shorter = $fl; }
1168 for ( $i = 0; $i < $shorter; ++
$i ) {
1169 if ( $st1{$i} != $st2{$i} ) { break; }
1173 # These next three functions open, continue, and close the list
1174 # element appropriate to the prefix character passed into them.
1176 /* private */ function openList( $char ) {
1177 $result = $this->closeParagraph();
1179 if ( '*' == $char ) { $result .= '<ul><li>'; }
1180 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1181 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1182 else if ( ';' == $char ) {
1183 $result .= '<dl><dt>';
1184 $this->mDTopen
= true;
1186 else { $result = '<!-- ERR 1 -->'; }
1191 /* private */ function nextItem( $char ) {
1192 if ( '*' == $char ||
'#' == $char ) { return '</li><li>'; }
1193 else if ( ':' == $char ||
';' == $char ) {
1195 if ( $this->mDTopen
) { $close = '</dt>'; }
1196 if ( ';' == $char ) {
1197 $this->mDTopen
= true;
1198 return $close . '<dt>';
1200 $this->mDTopen
= false;
1201 return $close . '<dd>';
1204 return '<!-- ERR 2 -->';
1207 /* private */ function closeList( $char ) {
1208 if ( '*' == $char ) { $text = '</li></ul>'; }
1209 else if ( '#' == $char ) { $text = '</li></ol>'; }
1210 else if ( ':' == $char ) {
1211 if ( $this->mDTopen
) {
1212 $this->mDTopen
= false;
1213 $text = '</dt></dl>';
1215 $text = '</dd></dl>';
1218 else { return '<!-- ERR 3 -->'; }
1222 /* private */ function doBlockLevels( $text, $linestart ) {
1223 $fname = 'Parser::doBlockLevels';
1224 wfProfileIn( $fname );
1226 # Parsing through the text line by line. The main thing
1227 # happening here is handling of block-level elements p, pre,
1228 # and making lists from lines starting with * # : etc.
1230 $textLines = explode( "\n", $text );
1232 $lastPrefix = $output = $lastLine = '';
1233 $this->mDTopen
= $inBlockElem = false;
1235 $paragraphStack = false;
1237 if ( !$linestart ) {
1238 $output .= array_shift( $textLines );
1240 foreach ( $textLines as $oLine ) {
1241 $lastPrefixLength = strlen( $lastPrefix );
1242 $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1243 $preOpenMatch = preg_match('/<pre/i', $oLine );
1244 if ( !$this->mInPre
) {
1245 # Multiple prefixes may abut each other for nested lists.
1246 $prefixLength = strspn( $oLine, '*#:;' );
1247 $pref = substr( $oLine, 0, $prefixLength );
1250 $pref2 = str_replace( ';', ':', $pref );
1251 $t = substr( $oLine, $prefixLength );
1252 $this->mInPre
= !empty($preOpenMatch);
1254 # Don't interpret any other prefixes in preformatted text
1256 $pref = $pref2 = '';
1261 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1262 # Same as the last item, so no need to deal with nesting or opening stuff
1263 $output .= $this->nextItem( substr( $pref, -1 ) );
1264 $paragraphStack = false;
1266 if ( substr( $pref, -1 ) == ';') {
1267 # The one nasty exception: definition lists work like this:
1268 # ; title : definition text
1269 # So we check for : in the remainder text to split up the
1270 # title and definition, without b0rking links.
1271 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1272 if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
1274 $output .= $term . $this->nextItem( ':' );
1278 } elseif( $prefixLength ||
$lastPrefixLength ) {
1279 # Either open or close a level...
1280 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1281 $paragraphStack = false;
1283 while( $commonPrefixLength < $lastPrefixLength ) {
1284 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1285 --$lastPrefixLength;
1287 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1288 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1290 while ( $prefixLength > $commonPrefixLength ) {
1291 $char = substr( $pref, $commonPrefixLength, 1 );
1292 $output .= $this->openList( $char );
1294 if ( ';' == $char ) {
1295 # FIXME: This is dupe of code above
1296 if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
1298 $output .= $term . $this->nextItem( ':' );
1302 ++
$commonPrefixLength;
1304 $lastPrefix = $pref2;
1306 if( 0 == $prefixLength ) {
1307 # No prefix (not in list)--go to paragraph mode
1308 $uniq_prefix = UNIQ_PREFIX
;
1309 // XXX: use a stack for nestable elements like span, table and div
1310 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1311 $closematch = preg_match(
1312 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1313 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1314 if ( $openmatch or $closematch ) {
1315 $paragraphStack = false;
1316 $output .= $this->closeParagraph();
1317 if($preOpenMatch and !$preCloseMatch) {
1318 $this->mInPre
= true;
1320 if ( $closematch ) {
1321 $inBlockElem = false;
1323 $inBlockElem = true;
1325 } else if ( !$inBlockElem && !$this->mInPre
) {
1326 if ( ' ' == $t{0} and ( $this->mLastSection
== 'pre' or trim($t) != '' ) ) {
1328 if ($this->mLastSection
!= 'pre') {
1329 $paragraphStack = false;
1330 $output .= $this->closeParagraph().'<pre>';
1331 $this->mLastSection
= 'pre';
1333 $t = substr( $t, 1 );
1336 if ( '' == trim($t) ) {
1337 if ( $paragraphStack ) {
1338 $output .= $paragraphStack.'<br />';
1339 $paragraphStack = false;
1340 $this->mLastSection
= 'p';
1342 if ($this->mLastSection
!= 'p' ) {
1343 $output .= $this->closeParagraph();
1344 $this->mLastSection
= '';
1345 $paragraphStack = '<p>';
1347 $paragraphStack = '</p><p>';
1351 if ( $paragraphStack ) {
1352 $output .= $paragraphStack;
1353 $paragraphStack = false;
1354 $this->mLastSection
= 'p';
1355 } else if ($this->mLastSection
!= 'p') {
1356 $output .= $this->closeParagraph().'<p>';
1357 $this->mLastSection
= 'p';
1363 if ($paragraphStack === false) {
1367 while ( $prefixLength ) {
1368 $output .= $this->closeList( $pref2{$prefixLength-1} );
1371 if ( '' != $this->mLastSection
) {
1372 $output .= '</' . $this->mLastSection
. '>';
1373 $this->mLastSection
= '';
1376 wfProfileOut( $fname );
1380 # Return value of a magic variable (like PAGENAME)
1381 function getVariableValue( $index ) {
1382 global $wgLang, $wgSitename, $wgServer;
1385 case MAG_CURRENTMONTH
:
1386 return $wgLang->formatNum( date( 'm' ) );
1387 case MAG_CURRENTMONTHNAME
:
1388 return $wgLang->getMonthName( date('n') );
1389 case MAG_CURRENTMONTHNAMEGEN
:
1390 return $wgLang->getMonthNameGen( date('n') );
1391 case MAG_CURRENTDAY
:
1392 return $wgLang->formatNum( date('j') );
1394 return $this->mTitle
->getText();
1396 return $this->mTitle
->getPartialURL();
1398 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1399 return $wgLang->getNsText($this->mTitle
->getNamespace()); # Patch by Dori
1400 case MAG_CURRENTDAYNAME
:
1401 return $wgLang->getWeekdayName( date('w')+
1 );
1402 case MAG_CURRENTYEAR
:
1403 return $wgLang->formatNum( date( 'Y' ) );
1404 case MAG_CURRENTTIME
:
1405 return $wgLang->time( wfTimestampNow(), false );
1406 case MAG_NUMBEROFARTICLES
:
1407 return $wgLang->formatNum( wfNumberOfArticles() );
1417 # initialise the magic variables (like CURRENTMONTHNAME)
1418 function initialiseVariables() {
1419 global $wgVariableIDs;
1420 $this->mVariables
= array();
1421 foreach ( $wgVariableIDs as $id ) {
1422 $mw =& MagicWord
::get( $id );
1423 $mw->addToArray( $this->mVariables
, $this->getVariableValue( $id ) );
1427 /* private */ function replaceVariables( $text, $args = array() ) {
1428 global $wgLang, $wgScript, $wgArticlePath;
1430 # Prevent too big inclusions
1431 if(strlen($text)> MAX_INCLUDE_SIZE
)
1434 $fname = 'Parser::replaceVariables';
1435 wfProfileIn( $fname );
1438 $titleChars = Title
::legalChars();
1439 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars );
1441 # This function is called recursively. To keep track of arguments we need a stack:
1442 array_push( $this->mArgStack
, $args );
1444 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1445 $GLOBALS['wgCurParser'] =& $this;
1447 if ( $this->mOutputType
== OT_HTML ||
$this->mOutputType
== OT_MSG
) {
1448 # Variable substitution
1449 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
1452 if ( $this->mOutputType
== OT_HTML
) {
1453 # Argument substitution
1454 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1456 # Template substitution
1457 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
1458 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1460 array_pop( $this->mArgStack
);
1462 wfProfileOut( $fname );
1466 function variableSubstitution( $matches ) {
1467 if ( !$this->mVariables
) {
1468 $this->initialiseVariables();
1470 if ( array_key_exists( $matches[1], $this->mVariables
) ) {
1471 $text = $this->mVariables
[$matches[1]];
1472 $this->mOutput
->mContainsOldMagic
= true;
1474 $text = $matches[0];
1479 # Split template arguments
1480 function getTemplateArgs( $argsString ) {
1481 if ( $argsString === '' ) {
1485 $args = explode( '|', substr( $argsString, 1 ) );
1487 # If any of the arguments contains a '[[' but no ']]', it needs to be
1488 # merged with the next arg because the '|' character between belongs
1489 # to the link syntax and not the template parameter syntax.
1490 $argc = count($args);
1492 for ( $i = 0; $i < $argc-1; $i++
) {
1493 if ( substr_count ( $args[$i], '[[' ) != substr_count ( $args[$i], ']]' ) ) {
1494 $args[$i] .= '|'.$args[$i+
1];
1495 array_splice($args, $i+
1, 1);
1504 function braceSubstitution( $matches ) {
1505 global $wgLinkCache, $wgLang;
1506 $fname = 'Parser::braceSubstitution';
1513 # $newline is an optional newline character before the braces
1514 # $part1 is the bit before the first |, and must contain only title characters
1515 # $args is a list of arguments, starting from index 0, not including $part1
1517 $newline = $matches[1];
1518 $part1 = $matches[2];
1519 # If the third subpattern matched anything, it will start with |
1521 $args = $this->getTemplateArgs($matches[3]);
1522 $argc = count( $args );
1525 if ( strpos( $matches[0], '{{{' ) !== false ) {
1526 $text = $matches[0];
1533 $mwSubst =& MagicWord
::get( MAG_SUBST
);
1534 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1535 if ( $this->mOutputType
!= OT_WIKI
) {
1536 # Invalid SUBST not replaced at PST time
1537 # Return without further processing
1538 $text = $matches[0];
1542 } elseif ( $this->mOutputType
== OT_WIKI
) {
1543 # SUBST not found in PST pass, do nothing
1544 $text = $matches[0];
1549 # MSG, MSGNW and INT
1552 $mwMsgnw =& MagicWord
::get( MAG_MSGNW
);
1553 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1556 # Remove obsolete MSG:
1557 $mwMsg =& MagicWord
::get( MAG_MSG
);
1558 $mwMsg->matchStartAndRemove( $part1 );
1561 # Check if it is an internal message
1562 $mwInt =& MagicWord
::get( MAG_INT
);
1563 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1564 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1565 $text = wfMsgReal( $part1, $args, true );
1573 # Check for NS: (namespace expansion)
1574 $mwNs = MagicWord
::get( MAG_NS
);
1575 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1576 if ( intval( $part1 ) ) {
1577 $text = $wgLang->getNsText( intval( $part1 ) );
1580 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1581 if ( !is_null( $index ) ) {
1582 $text = $wgLang->getNsText( $index );
1589 # LOCALURL and LOCALURLE
1591 $mwLocal = MagicWord
::get( MAG_LOCALURL
);
1592 $mwLocalE = MagicWord
::get( MAG_LOCALURLE
);
1594 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1595 $func = 'getLocalURL';
1596 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1597 $func = 'escapeLocalURL';
1602 if ( $func !== '' ) {
1603 $title = Title
::newFromText( $part1 );
1604 if ( !is_null( $title ) ) {
1606 $text = $title->$func( $args[0] );
1608 $text = $title->$func();
1615 # Internal variables
1616 if ( !$this->mVariables
) {
1617 $this->initialiseVariables();
1619 if ( !$found && array_key_exists( $part1, $this->mVariables
) ) {
1620 $text = $this->mVariables
[$part1];
1622 $this->mOutput
->mContainsOldMagic
= true;
1626 if ( !$found && $argc == 1 ) {
1627 $mwGrammar =& MagicWord
::get( MAG_GRAMMAR
);
1628 if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
1629 $text = $wgLang->convertGrammar( $args[0], $part1 );
1634 # Template table test
1636 # Did we encounter this template already? If yes, it is in the cache
1637 # and we need to check for loops.
1638 if ( isset( $this->mTemplates
[$part1] ) ) {
1639 # Infinite loop test
1640 if ( isset( $this->mTemplatePath
[$part1] ) ) {
1644 # set $text to cached message.
1645 $text = $this->mTemplates
[$part1];
1649 # Load from database
1651 $title = Title
::newFromText( $part1, NS_TEMPLATE
);
1652 if ( !is_null( $title ) && !$title->isExternal() ) {
1653 # Check for excessive inclusion
1654 $dbk = $title->getPrefixedDBkey();
1655 if ( $this->incrementIncludeCount( $dbk ) ) {
1656 # This should never be reached.
1657 $article = new Article( $title );
1658 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1659 if ( $articleContent !== false ) {
1661 $text = $articleContent;
1665 # If the title is valid but undisplayable, make a link to it
1666 if ( $this->mOutputType
== OT_HTML
&& !$found ) {
1667 $text = '[['.$title->getPrefixedText().']]';
1671 # Template cache array insertion
1672 $this->mTemplates
[$part1] = $text;
1676 # Recursive parsing, escaping and link table handling
1677 # Only for HTML output
1678 if ( $nowiki && $found && $this->mOutputType
== OT_HTML
) {
1679 $text = wfEscapeWikiText( $text );
1680 } elseif ( $this->mOutputType
== OT_HTML
&& $found && !$noparse) {
1681 # Clean up argument array
1682 $assocArgs = array();
1684 foreach( $args as $arg ) {
1685 $eqpos = strpos( $arg, '=' );
1686 if ( $eqpos === false ) {
1687 $assocArgs[$index++
] = $arg;
1689 $name = trim( substr( $arg, 0, $eqpos ) );
1690 $value = trim( substr( $arg, $eqpos+
1 ) );
1691 if ( $value === false ) {
1694 if ( $name !== false ) {
1695 $assocArgs[$name] = $value;
1700 # Do not enter included links in link table
1701 if ( !is_null( $title ) ) {
1702 $wgLinkCache->suspend();
1705 # Add a new element to the templace recursion path
1706 $this->mTemplatePath
[$part1] = 1;
1708 $text = $this->stripParse( $text, $newline, $assocArgs );
1710 # Resume the link cache and register the inclusion as a link
1711 if ( !is_null( $title ) ) {
1712 $wgLinkCache->resume();
1713 $wgLinkCache->addLinkObj( $title );
1717 # Empties the template path
1718 $this->mTemplatePath
= array();
1727 # Triple brace replacement -- used for template arguments
1728 function argSubstitution( $matches ) {
1729 $newline = $matches[1];
1730 $arg = trim( $matches[2] );
1731 $text = $matches[0];
1732 $inputArgs = end( $this->mArgStack
);
1734 if ( array_key_exists( $arg, $inputArgs ) ) {
1735 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1741 # Returns true if the function is allowed to include this entity
1742 function incrementIncludeCount( $dbk ) {
1743 if ( !array_key_exists( $dbk, $this->mIncludeCount
) ) {
1744 $this->mIncludeCount
[$dbk] = 0;
1746 if ( ++
$this->mIncludeCount
[$dbk] <= MAX_INCLUDE_REPEAT
) {
1754 # Cleans up HTML, removes dangerous tags and attributes
1755 /* private */ function removeHTMLtags( $text ) {
1756 global $wgUseTidy, $wgUserHtml;
1757 $fname = 'Parser::removeHTMLtags';
1758 wfProfileIn( $fname );
1761 $htmlpairs = array( # Tags that must be closed
1762 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
1763 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
1764 'strike', 'strong', 'tt', 'var', 'div', 'center',
1765 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
1766 'ruby', 'rt' , 'rb' , 'rp', 'p'
1768 $htmlsingle = array(
1769 'br', 'hr', 'li', 'dt', 'dd'
1771 $htmlnest = array( # Tags that can be nested--??
1772 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
1773 'dl', 'font', 'big', 'small', 'sub', 'sup'
1775 $tabletags = array( # Can only appear inside table
1779 $htmlpairs = array();
1780 $htmlsingle = array();
1781 $htmlnest = array();
1782 $tabletags = array();
1785 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1786 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1788 $htmlattrs = $this->getHTMLattrs () ;
1790 # Remove HTML comments
1791 $text = preg_replace( '/(\\n *<!--.*--> *|<!--.*?-->)/sU', '', $text );
1793 $bits = explode( '<', $text );
1794 $text = array_shift( $bits );
1796 $tagstack = array(); $tablestack = array();
1797 foreach ( $bits as $x ) {
1798 $prev = error_reporting( E_ALL
& ~
( E_NOTICE | E_WARNING
) );
1799 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1801 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1802 error_reporting( $prev );
1805 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1809 if ( ! in_array( $t, $htmlsingle ) &&
1810 ( $ot = @array_pop
( $tagstack ) ) != $t ) {
1811 @array_push
( $tagstack, $ot );
1814 if ( $t == 'table' ) {
1815 $tagstack = array_pop( $tablestack );
1820 # Keep track for later
1821 if ( in_array( $t, $tabletags ) &&
1822 ! in_array( 'table', $tagstack ) ) {
1824 } else if ( in_array( $t, $tagstack ) &&
1825 ! in_array ( $t , $htmlnest ) ) {
1827 } else if ( ! in_array( $t, $htmlsingle ) ) {
1828 if ( $t == 'table' ) {
1829 array_push( $tablestack, $tagstack );
1830 $tagstack = array();
1832 array_push( $tagstack, $t );
1834 # Strip non-approved attributes from the tag
1835 $newparams = $this->fixTagAttributes($params);
1839 $rest = str_replace( '>', '>', $rest );
1840 $text .= "<$slash$t $newparams$brace$rest";
1844 $text .= '<' . str_replace( '>', '>', $x);
1846 # Close off any remaining tags
1847 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1849 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
1852 # this might be possible using tidy itself
1853 foreach ( $bits as $x ) {
1854 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1856 @list
( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1857 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1858 $newparams = $this->fixTagAttributes($params);
1859 $rest = str_replace( '>', '>', $rest );
1860 $text .= "<$slash$t $newparams$brace$rest";
1862 $text .= '<' . str_replace( '>', '>', $x);
1866 wfProfileOut( $fname );
1871 # This function accomplishes several tasks:
1872 # 1) Auto-number headings if that option is enabled
1873 # 2) Add an [edit] link to sections for logged in users who have enabled the option
1874 # 3) Add a Table of contents on the top for users who have enabled the option
1875 # 4) Auto-anchor headings
1877 # It loops through all headlines, collects the necessary data, then splits up the
1878 # string and re-inserts the newly formatted headlines.
1879 /* private */ function formatHeadings( $text, $isMain=true ) {
1880 global $wgInputEncoding, $wgMaxTocLevel, $wgLang;
1882 $doNumberHeadings = $this->mOptions
->getNumberHeadings();
1883 $doShowToc = $this->mOptions
->getShowToc();
1884 $forceTocHere = false;
1885 if( !$this->mTitle
->userCanEdit() ) {
1887 $rightClickHack = 0;
1889 $showEditLink = $this->mOptions
->getEditSection();
1890 $rightClickHack = $this->mOptions
->getEditSectionOnRightClick();
1893 # Inhibit editsection links if requested in the page
1894 $esw =& MagicWord
::get( MAG_NOEDITSECTION
);
1895 if( $esw->matchAndRemove( $text ) ) {
1898 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1900 $mw =& MagicWord
::get( MAG_NOTOC
);
1901 if( $mw->matchAndRemove( $text ) ) {
1905 # never add the TOC to the Main Page. This is an entry page that should not
1906 # be more than 1-2 screens large anyway
1907 if( $this->mTitle
->getPrefixedText() == wfMsg('mainpage') ) {
1911 # Get all headlines for numbering them and adding funky stuff like [edit]
1912 # links - this is for later, but we need the number of headlines right now
1913 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
1915 # if there are fewer than 4 headlines in the article, do not show TOC
1916 if( $numMatches < 4 ) {
1920 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
1921 # override above conditions and always show TOC at that place
1922 $mw =& MagicWord
::get( MAG_TOC
);
1923 if ($mw->match( $text ) ) {
1925 $forceTocHere = true;
1927 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1928 # override above conditions and always show TOC above first header
1929 $mw =& MagicWord
::get( MAG_FORCETOC
);
1930 if ($mw->matchAndRemove( $text ) ) {
1937 # We need this to perform operations on the HTML
1938 $sk =& $this->mOptions
->getSkin();
1943 # Ugh .. the TOC should have neat indentation levels which can be
1944 # passed to the skin functions. These are determined here
1949 $sublevelCount = array();
1952 foreach( $matches[3] as $headline ) {
1955 $prevlevel = $level;
1957 $level = $matches[1][$headlineCount];
1958 if( ( $doNumberHeadings ||
$doShowToc ) && $prevlevel && $level > $prevlevel ) {
1959 # reset when we enter a new level
1960 $sublevelCount[$level] = 0;
1961 $toc .= $sk->tocIndent( $level - $prevlevel );
1962 $toclevel +
= $level - $prevlevel;
1964 if( ( $doNumberHeadings ||
$doShowToc ) && $level < $prevlevel ) {
1965 # reset when we step back a level
1966 $sublevelCount[$level+
1]=0;
1967 $toc .= $sk->tocUnindent( $prevlevel - $level );
1968 $toclevel -= $prevlevel - $level;
1970 # count number of headlines for each level
1971 @$sublevelCount[$level]++
;
1972 if( $doNumberHeadings ||
$doShowToc ) {
1974 for( $i = 1; $i <= $level; $i++
) {
1975 if( !empty( $sublevelCount[$i] ) ) {
1979 $numbering .= $wgLang->formatNum( $sublevelCount[$i] );
1985 # The canonized header is a version of the header text safe to use for links
1986 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1987 $canonized_headline = $this->unstrip( $headline, $this->mStripState
);
1988 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState
);
1990 # Remove link placeholders by the link text.
1991 # <!--LINK namespace page_title link text with suffix-->
1993 # link text with suffix
1994 $canonized_headline = preg_replace( '/<!--LINK [0-9]* [^ ]* *(.*?)-->/','$1', $canonized_headline );
1996 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
1997 $tocline = trim( $canonized_headline );
1998 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT
, $wgInputEncoding ) );
1999 $replacearray = array(
2003 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2004 $refer[$headlineCount] = $canonized_headline;
2006 # count how many in assoc. array so we can track dupes in anchors
2007 @$refers[$canonized_headline]++
;
2008 $refcount[$headlineCount]=$refers[$canonized_headline];
2010 # Prepend the number to the heading text
2012 if( $doNumberHeadings ||
$doShowToc ) {
2013 $tocline = $numbering . ' ' . $tocline;
2015 # Don't number the heading if it is the only one (looks silly)
2016 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2017 # the two are different if the line contains a link
2018 $headline=$numbering . ' ' . $headline;
2022 # Create the anchor for linking from the TOC to the section
2023 $anchor = $canonized_headline;
2024 if($refcount[$headlineCount] > 1 ) {
2025 $anchor .= '_' . $refcount[$headlineCount];
2027 if( $doShowToc && ( !isset($wgMaxTocLevel) ||
$toclevel<$wgMaxTocLevel ) ) {
2028 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2030 if( $showEditLink ) {
2031 if ( empty( $head[$headlineCount] ) ) {
2032 $head[$headlineCount] = '';
2034 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+
1);
2037 # Add the edit section span
2038 if( $rightClickHack ) {
2039 $headline = $sk->editSectionScript($headlineCount+
1,$headline);
2042 # give headline the correct <h#> tag
2043 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.'</h'.$level.'>';
2049 $toclines = $headlineCount;
2050 $toc .= $sk->tocUnindent( $toclevel );
2051 $toc = $sk->tocTable( $toc );
2054 # split up and insert constructed headlines
2056 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2059 foreach( $blocks as $block ) {
2060 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2061 # This is the [edit] link that appears for the top block of text when
2062 # section editing is enabled
2064 # Disabled because it broke block formatting
2065 # For example, a bullet point in the top line
2066 # $full .= $sk->editSectionLink(0);
2069 if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2070 # Top anchor now in skin
2074 if( !empty( $head[$i] ) ) {
2080 $mw =& MagicWord
::get( MAG_TOC
);
2081 return $mw->replace( $toc, $full );
2087 # Return an HTML link for the "ISBN 123456" text
2088 /* private */ function magicISBN( $text ) {
2090 $fname = 'Parser::magicISBN';
2091 wfProfileIn( $fname );
2093 $a = split( 'ISBN ', ' '.$text );
2094 if ( count ( $a ) < 2 ) {
2095 wfProfileOut( $fname );
2098 $text = substr( array_shift( $a ), 1);
2099 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2101 foreach ( $a as $x ) {
2102 $isbn = $blank = '' ;
2103 while ( ' ' == $x{0} ) {
2105 $x = substr( $x, 1 );
2107 if ( $x == '' ) { # blank isbn
2108 $text .= "ISBN $blank";
2111 while ( strstr( $valid, $x{0} ) != false ) {
2113 $x = substr( $x, 1 );
2115 $num = str_replace( '-', '', $isbn );
2116 $num = str_replace( ' ', '', $num );
2119 $text .= "ISBN $blank$x";
2121 $titleObj = Title
::makeTitle( NS_SPECIAL
, 'Booksources' );
2122 $text .= '<a href="' .
2123 $titleObj->escapeLocalUrl( 'isbn='.$num ) .
2124 "\" class=\"internal\">ISBN $isbn</a>";
2128 wfProfileOut( $fname );
2132 # Return an HTML link for the "GEO ..." text
2133 /* private */ function magicGEO( $text ) {
2134 global $wgLang, $wgUseGeoMode;
2135 $fname = 'Parser::magicGEO';
2136 wfProfileIn( $fname );
2138 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2139 $directions = array ( 'N' => 'North' , 'S' => 'South' , 'E' => 'East' , 'W' => 'West' ) ;
2140 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['N']}, (\d+)°(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2141 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['N']}, (\d+)°(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2142 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['S']}, (\d+)°(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2143 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['S']}, (\d+)°(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2145 $a = split( 'GEO ', ' '.$text );
2146 if ( count ( $a ) < 2 ) {
2147 wfProfileOut( $fname );
2150 $text = substr( array_shift( $a ), 1);
2151 $valid = '0123456789.+-:';
2153 foreach ( $a as $x ) {
2154 $geo = $blank = '' ;
2155 while ( ' ' == $x{0} ) {
2157 $x = substr( $x, 1 );
2159 while ( strstr( $valid, $x{0} ) != false ) {
2161 $x = substr( $x, 1 );
2163 $num = str_replace( '+', '', $geo );
2164 $num = str_replace( ' ', '', $num );
2166 if ( '' == $num ||
count ( explode ( ':' , $num , 3 ) ) < 2 ) {
2167 $text .= "GEO $blank$x";
2169 $titleObj = Title
::makeTitle( NS_SPECIAL
, 'Geo' );
2170 $text .= '<a href="' .
2171 $titleObj->escapeLocalUrl( 'coordinates='.$num ) .
2172 "\" class=\"internal\">GEO $geo</a>";
2176 wfProfileOut( $fname );
2180 # Return an HTML link for the "RFC 1234" text
2181 /* private */ function magicRFC( $text ) {
2184 $a = split( 'RFC ', ' '.$text );
2185 if ( count ( $a ) < 2 ) return $text;
2186 $text = substr( array_shift( $a ), 1);
2187 $valid = '0123456789';
2189 foreach ( $a as $x ) {
2190 $rfc = $blank = '' ;
2191 while ( ' ' == $x{0} ) {
2193 $x = substr( $x, 1 );
2195 while ( strstr( $valid, $x{0} ) != false ) {
2197 $x = substr( $x, 1 );
2201 $text .= "RFC $blank$x";
2203 $url = wfmsg( 'rfcurl' );
2204 $url = str_replace( '$1', $rfc, $url);
2205 $sk =& $this->mOptions
->getSkin();
2206 $la = $sk->getExternalLinkAttributes( $url, 'RFC '.$rfc );
2207 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2213 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2214 $this->mOptions
= $options;
2215 $this->mTitle
=& $title;
2216 $this->mOutputType
= OT_WIKI
;
2218 if ( $clearState ) {
2219 $this->clearState();
2222 $stripState = false;
2226 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2230 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2231 "/<br *?>/i" => "<br />",
2233 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2235 $text = $this->strip( $text, $stripState, false );
2236 $text = $this->pstPass2( $text, $user );
2237 $text = $this->unstrip( $text, $stripState );
2238 $text = $this->unstripNoWiki( $text, $stripState );
2242 /* private */ function pstPass2( $text, &$user ) {
2243 global $wgLang, $wgLocaltimezone, $wgCurParser;
2245 # Variable replacement
2246 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2247 $text = $this->replaceVariables( $text );
2251 $n = $user->getName();
2252 $k = $user->getOption( 'nickname' );
2253 if ( '' == $k ) { $k = $n; }
2254 if(isset($wgLocaltimezone)) {
2255 $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2257 /* Note: this is an ugly timezone hack for the European wikis */
2258 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2259 ' (' . date( 'T' ) . ')';
2260 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2262 $text = preg_replace( '/~~~~~/', $d, $text );
2263 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText( NS_USER
) . ":$n|$k]] $d", $text );
2264 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText( NS_USER
) . ":$n|$k]]", $text );
2266 # Context links: [[|name]] and [[name (context)|]]
2268 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2269 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2270 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2271 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2273 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
2274 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
2275 $p3 = "/\[\[(:*$namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]] and [[:namespace:page|]]
2276 $p4 = "/\[\[(:*$namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/"; # [[ns:page (cont)|]] and [[:ns:page (cont)|]]
2278 $t = $this->mTitle
->getText();
2279 if ( preg_match( $conpat, $t, $m ) ) {
2282 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2283 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2284 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2286 if ( '' == $context ) {
2287 $text = preg_replace( $p2, '[[\\1]]', $text );
2289 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2293 $mw =& MagicWord::get( MAG_SUBST );
2294 $wgCurParser = $this->fork();
2295 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2296 $this->merge( $wgCurParser );
2299 # Trim trailing whitespace
2300 # MAG_END (__END__) tag allows for trailing
2301 # whitespace to be deliberately included
2302 $text = rtrim( $text );
2303 $mw =& MagicWord
::get( MAG_END
);
2304 $mw->matchAndRemove( $text );
2309 # Set up some variables which are usually set up in parse()
2310 # so that an external function can call some class members with confidence
2311 function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2312 $this->mTitle
=& $title;
2313 $this->mOptions
= $options;
2314 $this->mOutputType
= $outputType;
2315 if ( $clearState ) {
2316 $this->clearState();
2320 function transformMsg( $text, $options ) {
2322 static $executing = false;
2324 # Guard against infinite recursion
2330 $this->mTitle
= $wgTitle;
2331 $this->mOptions
= $options;
2332 $this->mOutputType
= OT_MSG
;
2333 $this->clearState();
2334 $text = $this->replaceVariables( $text );
2340 # Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2341 # Callback will be called with the text within
2342 # Transform and return the text within
2343 function setHook( $tag, $callback ) {
2344 $oldVal = @$this->mTagHooks
[$tag];
2345 $this->mTagHooks
[$tag] = $callback;
2352 * @package MediaWiki
2356 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2357 var $mCacheTime; # Used in ParserCache
2359 function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
2360 $containsOldMagic = false )
2362 $this->mText
= $text;
2363 $this->mLanguageLinks
= $languageLinks;
2364 $this->mCategoryLinks
= $categoryLinks;
2365 $this->mContainsOldMagic
= $containsOldMagic;
2366 $this->mCacheTime
= '';
2369 function getText() { return $this->mText
; }
2370 function getLanguageLinks() { return $this->mLanguageLinks
; }
2371 function getCategoryLinks() { return $this->mCategoryLinks
; }
2372 function getCacheTime() { return $this->mCacheTime
; }
2373 function containsOldMagic() { return $this->mContainsOldMagic
; }
2374 function setText( $text ) { return wfSetVar( $this->mText
, $text ); }
2375 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks
, $ll ); }
2376 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks
, $cl ); }
2377 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic
, $com ); }
2378 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime
, $t ); }
2380 function merge( $other ) {
2381 $this->mLanguageLinks
= array_merge( $this->mLanguageLinks
, $other->mLanguageLinks
);
2382 $this->mCategoryLinks
= array_merge( $this->mCategoryLinks
, $this->mLanguageLinks
);
2383 $this->mContainsOldMagic
= $this->mContainsOldMagic ||
$other->mContainsOldMagic
;
2389 * Set options of the Parser
2391 * @package MediaWiki
2395 # All variables are private
2396 var $mUseTeX; # Use texvc to expand <math> tags
2397 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2398 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2399 var $mAllowExternalImages; # Allow external images inline
2400 var $mSkin; # Reference to the preferred skin
2401 var $mDateFormat; # Date format index
2402 var $mEditSection; # Create "edit section" links
2403 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2404 var $mNumberHeadings; # Automatically number headings
2405 var $mShowToc; # Show table of contents
2407 function getUseTeX() { return $this->mUseTeX
; }
2408 function getUseDynamicDates() { return $this->mUseDynamicDates
; }
2409 function getInterwikiMagic() { return $this->mInterwikiMagic
; }
2410 function getAllowExternalImages() { return $this->mAllowExternalImages
; }
2411 function getSkin() { return $this->mSkin
; }
2412 function getDateFormat() { return $this->mDateFormat
; }
2413 function getEditSection() { return $this->mEditSection
; }
2414 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick
; }
2415 function getNumberHeadings() { return $this->mNumberHeadings
; }
2416 function getShowToc() { return $this->mShowToc
; }
2418 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX
, $x ); }
2419 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates
, $x ); }
2420 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic
, $x ); }
2421 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages
, $x ); }
2422 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat
, $x ); }
2423 function setEditSection( $x ) { return wfSetVar( $this->mEditSection
, $x ); }
2424 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick
, $x ); }
2425 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings
, $x ); }
2426 function setShowToc( $x ) { return wfSetVar( $this->mShowToc
, $x ); }
2428 function setSkin( &$x ) { $this->mSkin
=& $x; }
2430 # Get parser options
2431 /* static */ function newFromUser( &$user ) {
2432 $popts = new ParserOptions
;
2433 $popts->initialiseFromUser( $user );
2438 function initialiseFromUser( &$userInput ) {
2439 global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2441 $fname = 'ParserOptions::initialiseFromUser';
2442 wfProfileIn( $fname );
2443 if ( !$userInput ) {
2445 $user->setLoaded( true );
2447 $user =& $userInput;
2450 $this->mUseTeX
= $wgUseTeX;
2451 $this->mUseDynamicDates
= $wgUseDynamicDates;
2452 $this->mInterwikiMagic
= $wgInterwikiMagic;
2453 $this->mAllowExternalImages
= $wgAllowExternalImages;
2454 wfProfileIn( $fname.'-skin' );
2455 $this->mSkin
=& $user->getSkin();
2456 wfProfileOut( $fname.'-skin' );
2457 $this->mDateFormat
= $user->getOption( 'date' );
2458 $this->mEditSection
= $user->getOption( 'editsection' );
2459 $this->mEditSectionOnRightClick
= $user->getOption( 'editsectiononrightclick' );
2460 $this->mNumberHeadings
= $user->getOption( 'numberheadings' );
2461 $this->mShowToc
= $user->getOption( 'showtoc' );
2462 wfProfileOut( $fname );
2468 # Regex callbacks, used in Parser::replaceVariables
2469 function wfBraceSubstitution( $matches ) {
2470 global $wgCurParser;
2471 return $wgCurParser->braceSubstitution( $matches );
2474 function wfArgSubstitution( $matches ) {
2475 global $wgCurParser;
2476 return $wgCurParser->argSubstitution( $matches );
2479 function wfVariableSubstitution( $matches ) {
2480 global $wgCurParser;
2481 return $wgCurParser->variableSubstitution( $matches );
2485 * Return the total number of articles
2487 function wfNumberOfArticles() {
2488 global $wgNumberOfArticles;
2491 return $wgNumberOfArticles;
2495 * Get various statistics from the database
2498 function wfLoadSiteStats() {
2499 global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2500 $fname = 'wfLoadSiteStats';
2502 if ( -1 != $wgNumberOfArticles ) return;
2503 $dbr =& wfGetDB( DB_SLAVE
);
2504 $s = $dbr->getArray( 'site_stats',
2505 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2506 array( 'ss_row_id' => 1 ), $fname
2509 if ( $s === false ) {
2512 $wgTotalViews = $s->ss_total_views
;
2513 $wgTotalEdits = $s->ss_total_edits
;
2514 $wgNumberOfArticles = $s->ss_good_articles
;
2518 function wfEscapeHTMLTagsOnly( $in ) {
2520 array( '"', '>', '<' ),
2521 array( '"', '>', '<' ),