3 // require_once('Tokenizer.php');
8 * Processes wiki markup
10 * There are two main entry points into the Parser class:
12 * produces HTML output
14 * produces altered wiki markup.
17 * objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
19 * NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
22 * $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
23 * $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
26 * * only within ParserOptions
32 * Variable substitution O(N^2) attack
34 * Without countermeasures, it would be possible to attack the parser by saving
35 * a page filled with a large number of inclusions of large pages. The size of
36 * the generated page would be proportional to the square of the input size.
37 * Hence, we limit the number of inclusions of any given page, thus bringing any
38 * attack back to O(N).
40 define( 'MAX_INCLUDE_REPEAT', 100 );
41 define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Million
43 # Allowed values for $mOutputType
44 define( 'OT_HTML', 1 );
45 define( 'OT_WIKI', 2 );
46 define( 'OT_MSG' , 3 );
48 # string parameter for extractTags which will cause it
49 # to strip HTML comments in addition to regular
50 # <XML>-style tags. This should not be anything we
51 # may want to use in wikisyntax
52 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
54 # prefix for escaping, used in two functions at least
55 define( 'UNIQ_PREFIX', 'NaodW29');
57 # Constants needed for external link processing
58 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
59 define( 'HTTP_PROTOCOLS', 'http|https' );
60 # Everything except bracket, space, or control characters
61 define( 'EXT_LINK_URL_CLASS', '[^]\\x00-\\x20\\x7F]' );
62 define( 'INVERSE_EXT_LINK_URL_CLASS', '[\]\\x00-\\x20\\x7F]' );
64 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
65 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
66 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
67 define( 'EXT_LINK_BRACKETED', '/\[(('.URL_PROTOCOLS
.'):'.EXT_LINK_URL_CLASS
.'+) *('.EXT_LINK_TEXT_CLASS
.'*?)\]/S' );
68 define( 'EXT_IMAGE_REGEX',
69 '/^('.HTTP_PROTOCOLS
.':)'. # Protocol
70 '('.EXT_LINK_URL_CLASS
.'+)\\/'. # Hostname and path
71 '('.EXT_IMAGE_FNAME_CLASS
.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS
.')$/S' # Filename
83 # Cleared with clearState():
84 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
85 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
88 var $mOptions, $mTitle, $mOutputType,
89 $mTemplates, // cache of already loaded templates, avoids
90 // multiple SQL queries for the same string
91 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
92 // in this path. Used for loop detection.
95 $this->mTemplates
= array();
96 $this->mTemplatePath
= array();
97 $this->mTagHooks
= array();
101 function clearState() {
102 $this->mOutput
= new ParserOutput
;
103 $this->mAutonumber
= 0;
104 $this->mLastSection
= "";
105 $this->mDTopen
= false;
106 $this->mVariables
= false;
107 $this->mIncludeCount
= array();
108 $this->mStripState
= array();
109 $this->mArgStack
= array();
110 $this->mInPre
= false;
113 # First pass--just handle <nowiki> sections, pass the rest off
114 # to internalParse() which does all the real work.
116 # Returns a ParserOutput
118 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
120 $fname = 'Parser::parse';
121 wfProfileIn( $fname );
127 $this->mOptions
= $options;
128 $this->mTitle
=& $title;
129 $this->mOutputType
= OT_HTML
;
132 $text = $this->strip( $text, $this->mStripState
);
133 $text = $this->internalParse( $text, $linestart );
134 $text = $this->unstrip( $text, $this->mStripState
);
135 # Clean up special characters, only run once, next-to-last before doBlockLevels
138 # french spaces, last one Guillemet-left
139 # only if there is something before the space
140 '/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1 \\2',
141 # french spaces, Guillemet-right
142 "/(\\302\\253) /i"=>"\\1 ",
143 '/<hr *>/i' => '<hr />',
144 '/<br *>/i' => '<br />',
145 '/<center *>/i' => '<div class="center">',
146 '/<\\/center *>/i' => '</div>',
147 # Clean up spare ampersands; note that we probably ought to be
148 # more careful about named entities.
149 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&'
151 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
154 # french spaces, last one Guillemet-left
155 '/ (\\?|:|;|!|\\302\\273)/i' => ' \\1',
156 # french spaces, Guillemet-right
157 '/(\\302\\253) /i' => '\\1 ',
158 '/<center *>/i' => '<div class="center">',
159 '/<\\/center *>/i' => '</div>'
161 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
164 $text = $this->doBlockLevels( $text, $linestart );
165 $text = $this->unstripNoWiki( $text, $this->mStripState
);
167 $text = $this->tidy($text);
169 $this->mOutput
->setText( $text );
170 wfProfileOut( $fname );
171 return $this->mOutput
;
174 /* static */ function getRandomString() {
175 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
178 # Replaces all occurrences of <$tag>content</$tag> in the text
179 # with a random marker and returns the new text. the output parameter
180 # $content will be an associative array filled with data on the form
181 # $unique_marker => content.
183 # If $content is already set, the additional entries will be appended
185 # If $tag is set to STRIP_COMMENTS, the function will extract
186 # <!-- HTML comments -->
188 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ''){
189 $rnd = $uniq_prefix . '-' . $tag . Parser
::getRandomString();
196 while ( '' != $text ) {
197 if($tag==STRIP_COMMENTS
) {
198 $p = preg_split( '/<!--/i', $text, 2 );
200 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
203 if ( ( count( $p ) < 2 ) ||
( '' == $p[1] ) ) {
206 if($tag==STRIP_COMMENTS
) {
207 $q = preg_split( '/-->/i', $p[1], 2 );
209 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
211 $marker = $rnd . sprintf('%08X', $n++
);
212 $content[$marker] = $q[0];
213 $stripped .= $marker;
220 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
221 # If $render is set, performs necessary rendering operations on plugins
222 # Returns the text, and fills an array with data needed in unstrip()
223 # If the $state is already a valid strip state, it adds to the state
225 # When $stripcomments is set, HTML comments <!-- like this -->
226 # will be stripped in addition to other tags. This is important
227 # for section editing, where these comments cause confusion when
228 # counting the sections in the wikisource
229 function strip( $text, &$state, $stripcomments = false ) {
230 $render = ($this->mOutputType
== OT_HTML
);
231 $html_content = array();
232 $nowiki_content = array();
233 $math_content = array();
234 $pre_content = array();
235 $comment_content = array();
236 $ext_content = array();
238 # Replace any instances of the placeholders
239 $uniq_prefix = UNIQ_PREFIX
;
240 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
243 global $wgRawHtml, $wgWhitelistEdit;
244 if( $wgRawHtml && $wgWhitelistEdit ) {
245 $text = Parser
::extractTags('html', $text, $html_content, $uniq_prefix);
246 foreach( $html_content as $marker => $content ) {
248 # Raw and unchecked for validity.
249 $html_content[$marker] = $content;
251 $html_content[$marker] = '<html>'.$content.'</html>';
257 $text = Parser
::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
258 foreach( $nowiki_content as $marker => $content ) {
260 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
262 $nowiki_content[$marker] = '<nowiki>'.$content.'</nowiki>';
267 $text = Parser
::extractTags('math', $text, $math_content, $uniq_prefix);
268 foreach( $math_content as $marker => $content ){
270 if( $this->mOptions
->getUseTeX() ) {
271 $math_content[$marker] = renderMath( $content );
273 $math_content[$marker] = '<math>'.$content.'<math>';
276 $math_content[$marker] = '<math>'.$content.'</math>';
281 $text = Parser
::extractTags('pre', $text, $pre_content, $uniq_prefix);
282 foreach( $pre_content as $marker => $content ){
284 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
286 $pre_content[$marker] = '<pre>'.$content.'</pre>';
292 $text = Parser
::extractTags(STRIP_COMMENTS
, $text, $comment_content, $uniq_prefix);
293 foreach( $comment_content as $marker => $content ){
294 $comment_content[$marker] = '<!--'.$content.'-->';
299 foreach ( $this->mTagHooks
as $tag => $callback ) {
300 $ext_contents[$tag] = array();
301 $text = Parser
::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
302 foreach( $ext_content[$tag] as $marker => $content ) {
304 $ext_content[$tag][$marker] = $callback( $content );
306 $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
311 # Merge state with the pre-existing state, if there is one
313 $state['html'] = $state['html'] +
$html_content;
314 $state['nowiki'] = $state['nowiki'] +
$nowiki_content;
315 $state['math'] = $state['math'] +
$math_content;
316 $state['pre'] = $state['pre'] +
$pre_content;
317 $state['comment'] = $state['comment'] +
$comment_content;
319 foreach( $ext_content as $tag => $array ) {
320 if ( array_key_exists( $tag, $state ) ) {
321 $state[$tag] = $state[$tag] +
$array;
326 'html' => $html_content,
327 'nowiki' => $nowiki_content,
328 'math' => $math_content,
329 'pre' => $pre_content,
330 'comment' => $comment_content,
336 # always call unstripNoWiki() after this one
337 function unstrip( $text, &$state ) {
338 # Must expand in reverse order, otherwise nested tags will be corrupted
339 $contentDict = end( $state );
340 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
341 if( key($state) != 'nowiki' && key($state) != 'html') {
342 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
343 $text = str_replace( key( $contentDict ), $content, $text );
350 # always call this after unstrip() to preserve the order
351 function unstripNoWiki( $text, &$state ) {
352 # Must expand in reverse order, otherwise nested tags will be corrupted
353 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
354 $text = str_replace( key( $state['nowiki'] ), $content, $text );
359 for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
360 $text = str_replace( key( $state['html'] ), $content, $text );
367 # Add an item to the strip state
368 # Returns the unique tag which must be inserted into the stripped text
369 # The tag will be replaced with the original text in unstrip()
370 function insertStripItem( $text, &$state ) {
371 $rnd = UNIQ_PREFIX
. '-item' . Parser
::getRandomString();
380 $state['item'][$rnd] = $text;
384 # Return allowed HTML attributes
385 function getHTMLattrs () {
386 $htmlattrs = array( # Allowed attributes--no scripting, etc.
387 'title', 'align', 'lang', 'dir', 'width', 'height',
388 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
389 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
390 /* FONT */ 'type', 'start', 'value', 'compact',
391 /* For various lists, mostly deprecated but safe */
392 'summary', 'width', 'border', 'frame', 'rules',
393 'cellspacing', 'cellpadding', 'valign', 'char',
394 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
395 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
396 'id', 'class', 'name', 'style' /* For CSS */
401 # Remove non approved attributes and javascript in css
402 function fixTagAttributes ( $t ) {
403 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
404 $htmlattrs = $this->getHTMLattrs() ;
406 # Strip non-approved attributes from the tag
408 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
409 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
412 $t = str_replace ( '<></>' , '' , $t ) ; # This should fix bug 980557
414 # Strip javascript "expression" from stylesheets. Brute force approach:
415 # If anythin offensive is found, all attributes of the HTML tag are dropped
418 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
419 wfMungeToUtf8( $t ) ) )
427 # interface with html tidy, used if $wgUseTidy = true
428 function tidy ( $text ) {
429 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
430 global $wgInputEncoding, $wgOutputEncoding;
431 $fname = 'Parser::tidy';
432 wfProfileIn( $fname );
435 switch(strtoupper($wgOutputEncoding)) {
437 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)?
' -latin1':' -raw';
440 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)?
' -utf8':' -raw';
443 $wgTidyOpts .= ' -raw';
446 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
447 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
448 '<head><title>test</title></head><body>'.$text.'</body></html>';
449 $descriptorspec = array(
450 0 => array('pipe', 'r'),
451 1 => array('pipe', 'w'),
452 2 => array('file', '/dev/null', 'a')
454 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
455 if (is_resource($process)) {
456 fwrite($pipes[0], $wrappedtext);
458 while (!feof($pipes[1])) {
459 $cleansource .= fgets($pipes[1], 1024);
462 $return_value = proc_close($process);
465 wfProfileOut( $fname );
467 if( $cleansource == '' && $text != '') {
468 wfDebug( "Tidy error detected!\n" );
469 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
475 # parse the wiki syntax used to render tables
476 function doTableStuff ( $t ) {
477 $fname = 'Parser::doTableStuff';
478 wfProfileIn( $fname );
480 $t = explode ( "\n" , $t ) ;
481 $td = array () ; # Is currently a td tag open?
482 $ltd = array () ; # Was it TD or TH?
483 $tr = array () ; # Is currently a tr tag open?
484 $ltr = array () ; # tr attributes
485 $indent_level = 0; # indent level of the table
486 foreach ( $t AS $k => $x )
489 $fc = substr ( $x , 0 , 1 ) ;
490 if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
491 $indent_level = strlen( $matches[1] );
493 str_repeat( '<dl><dd>', $indent_level ) .
494 '<table ' . $this->fixTagAttributes ( $matches[2] ) . '>' ;
495 array_push ( $td , false ) ;
496 array_push ( $ltd , '' ) ;
497 array_push ( $tr , false ) ;
498 array_push ( $ltr , '' ) ;
500 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
501 else if ( '|}' == substr ( $x , 0 , 2 ) ) {
503 $l = array_pop ( $ltd ) ;
504 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
505 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
507 $t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
509 else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
510 $x = substr ( $x , 1 ) ;
511 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
513 $l = array_pop ( $ltd ) ;
514 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
515 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
518 array_push ( $tr , false ) ;
519 array_push ( $td , false ) ;
520 array_push ( $ltd , '' ) ;
521 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
523 else if ( '|' == $fc ||
'!' == $fc ||
'|+' == substr ( $x , 0 , 2 ) ) { # Caption
524 if ( '|+' == substr ( $x , 0 , 2 ) ) {
526 $x = substr ( $x , 1 ) ;
528 $after = substr ( $x , 1 ) ;
529 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
530 $after = explode ( '||' , $after ) ;
532 foreach ( $after AS $theline )
537 $tra = array_pop ( $ltr ) ;
538 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
539 array_push ( $tr , true ) ;
540 array_push ( $ltr , '' ) ;
543 $l = array_pop ( $ltd ) ;
544 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
545 if ( $fc == '|' ) $l = 'td' ;
546 else if ( $fc == '!' ) $l = 'th' ;
547 else if ( $fc == '+' ) $l = 'caption' ;
549 array_push ( $ltd , $l ) ;
550 $y = explode ( '|' , $theline , 2 ) ;
551 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
552 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
554 array_push ( $td , true ) ;
559 # Closing open td, tr && table
560 while ( count ( $td ) > 0 )
562 if ( array_pop ( $td ) ) $t[] = '</td>' ;
563 if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
567 $t = implode ( "\n" , $t ) ;
568 # $t = $this->removeHTMLtags( $t );
569 wfProfileOut( $fname );
573 # Parses the text and adds the result to the strip state
574 # Returns the strip tag
575 function stripParse( $text, $newline, $args ) {
576 $text = $this->strip( $text, $this->mStripState
);
577 $text = $this->internalParse( $text, (bool)$newline, $args, false );
578 return $newline.$this->insertStripItem( $text, $this->mStripState
);
581 function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
582 global $wgLanguageCode, $wgLang;
584 $fname = 'Parser::internalParse';
585 wfProfileIn( $fname );
587 $text = $this->removeHTMLtags( $text );
588 $text = $this->replaceVariables( $text, $args );
590 if($wgLanguageCode == "zh") {
591 $text = $wgLang->convert($text);
594 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
596 $text = $this->doHeadings( $text );
597 if($this->mOptions
->getUseDynamicDates()) {
598 global $wgDateFormatter;
599 $text = $wgDateFormatter->reformat( $this->mOptions
->getDateFormat(), $text );
601 $text = $this->doAllQuotes( $text );
602 $text = $this->replaceExternalLinks( $text );
603 $text = $this->doMagicLinks( $text );
604 $text = $this->replaceInternalLinks ( $text );
605 $text = $this->replaceInternalLinks ( $text );
607 $text = $this->unstrip( $text, $this->mStripState
);
608 $text = $this->unstripNoWiki( $text, $this->mStripState
);
610 $text = $this->doTableStuff( $text );
611 $text = $this->formatHeadings( $text, $isMain );
612 $sk =& $this->mOptions
->getSkin();
613 $text = $sk->transformContent( $text );
615 wfProfileOut( $fname );
619 /* private */ function &doMagicLinks( &$text ) {
620 global $wgUseGeoMode;
621 $text = $this->magicISBN( $text );
622 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
623 $text = $this->magicGEO( $text );
625 $text = $this->magicRFC( $text );
629 # Parse ^^ tokens and return html
630 /* private */ function doExponent ( $text ) {
631 $fname = 'Parser::doExponent';
632 wfProfileIn( $fname);
633 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
634 wfProfileOut( $fname);
638 # Parse headers and return html
639 /* private */ function doHeadings( $text ) {
640 $fname = 'Parser::doHeadings';
641 wfProfileIn( $fname );
642 for ( $i = 6; $i >= 1; --$i ) {
643 $h = substr( '======', 0, $i );
644 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
645 "<h{$i}>\\1</h{$i}>\\2", $text );
647 wfProfileOut( $fname );
651 /* private */ function doAllQuotes( $text ) {
652 $fname = 'Parser::doAllQuotes';
653 wfProfileIn( $fname );
655 $lines = explode( "\n", $text );
656 foreach ( $lines as $line ) {
657 $outtext .= $this->doQuotes ( $line ) . "\n";
659 $outtext = substr($outtext, 0,-1);
660 wfProfileOut( $fname );
664 /* private */ function doQuotes( $text ) {
665 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE
);
666 if (count ($arr) == 1)
670 # First, do some preliminary work. This may shift some apostrophes from
671 # being mark-up to being text. It also counts the number of occurrences
672 # of bold and italics mark-ups.
680 # If there are ever four apostrophes, assume the first is supposed to
681 # be text, and the remaining three constitute mark-up for bold text.
682 if (strlen ($arr[$i]) == 4)
687 # If there are more than 5 apostrophes in a row, assume they're all
688 # text except for the last 5.
689 else if (strlen ($arr[$i]) > 5)
691 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
694 # Count the number of occurrences of bold and italics mark-ups.
695 # We are not counting sequences of five apostrophes.
696 if (strlen ($arr[$i]) == 2) $numitalics++
; else
697 if (strlen ($arr[$i]) == 3) $numbold++
; else
698 if (strlen ($arr[$i]) == 5) { $numitalics++
; $numbold++
; }
703 # If there is an odd number of both bold and italics, it is likely
704 # that one of the bold ones was meant to be an apostrophe followed
705 # by italics. Which one we cannot know for certain, but it is more
706 # likely to be one that has a single-letter word before it.
707 if (($numbold %
2 == 1) && ($numitalics %
2 == 1))
710 $firstsingleletterword = -1;
711 $firstmultiletterword = -1;
715 if (($i %
2 == 1) and (strlen ($r) == 3))
717 $x1 = substr ($arr[$i-1], -1);
718 $x2 = substr ($arr[$i-1], -2, 1);
720 if ($firstspace == -1) $firstspace = $i;
721 } else if ($x2 == ' ') {
722 if ($firstsingleletterword == -1) $firstsingleletterword = $i;
724 if ($firstmultiletterword == -1) $firstmultiletterword = $i;
730 # If there is a single-letter word, use it!
731 if ($firstsingleletterword > -1)
733 $arr [ $firstsingleletterword ] = "''";
734 $arr [ $firstsingleletterword-1 ] .= "'";
736 # If not, but there's a multi-letter word, use that one.
737 else if ($firstmultiletterword > -1)
739 $arr [ $firstmultiletterword ] = "''";
740 $arr [ $firstmultiletterword-1 ] .= "'";
742 # ... otherwise use the first one that has neither.
743 # (notice that it is possible for all three to be -1 if, for example,
744 # there is only one pentuple-apostrophe in the line)
745 else if ($firstspace > -1)
747 $arr [ $firstspace ] = "''";
748 $arr [ $firstspace-1 ] .= "'";
752 # Now let's actually convert our apostrophic mush to HTML!
761 if ($state == 'both')
768 if (strlen ($r) == 2)
771 { $output .= '</i>'; $state = ''; }
772 else if ($state == 'bi')
773 { $output .= '</i>'; $state = 'b'; }
774 else if ($state == 'ib')
775 { $output .= '</b></i><b>'; $state = 'b'; }
776 else if ($state == 'both')
777 { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
778 else # $state can be 'b' or ''
779 { $output .= '<i>'; $state .= 'i'; }
781 else if (strlen ($r) == 3)
784 { $output .= '</b>'; $state = ''; }
785 else if ($state == 'bi')
786 { $output .= '</i></b><i>'; $state = 'i'; }
787 else if ($state == 'ib')
788 { $output .= '</b>'; $state = 'i'; }
789 else if ($state == 'both')
790 { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
791 else # $state can be 'i' or ''
792 { $output .= '<b>'; $state .= 'b'; }
794 else if (strlen ($r) == 5)
797 { $output .= '</b><i>'; $state = 'i'; }
798 else if ($state == 'i')
799 { $output .= '</i><b>'; $state = 'b'; }
800 else if ($state == 'bi')
801 { $output .= '</i></b>'; $state = ''; }
802 else if ($state == 'ib')
803 { $output .= '</b></i>'; $state = ''; }
804 else if ($state == 'both')
805 { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
806 else # ($state == '')
807 { $buffer = ''; $state = 'both'; }
812 # Now close all remaining tags. Notice that the order is important.
813 if ($state == 'b' ||
$state == 'ib')
815 if ($state == 'i' ||
$state == 'bi' ||
$state == 'ib')
819 if ($state == 'both')
820 $output .= '<b><i>'.$buffer.'</i></b>';
825 # Note: we have to do external links before the internal ones,
826 # and otherwise take great care in the order of things here, so
827 # that we don't end up interpreting some URLs twice.
829 /* private */ function replaceExternalLinks( $text ) {
830 $fname = 'Parser::replaceExternalLinks';
831 wfProfileIn( $fname );
833 $sk =& $this->mOptions
->getSkin();
834 $linktrail = wfMsg('linktrail');
835 $bits = preg_split( EXT_LINK_BRACKETED
, $text, -1, PREG_SPLIT_DELIM_CAPTURE
);
837 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
840 while ( $i<count( $bits ) ) {
842 $protocol = $bits[$i++
];
844 $trail = $bits[$i++
];
846 # If the link text is an image URL, replace it with an <img> tag
847 # This happened by accident in the original parser, but some people used it extensively
848 $img = $this->maybeMakeImageLink( $text );
849 if ( $img !== false ) {
855 # No link text, e.g. [http://domain.tld/some.link]
857 # Autonumber if allowed
858 if ( strpos( HTTP_PROTOCOLS
, $protocol ) !== false ) {
859 $text = '[' . ++
$this->mAutonumber
. ']';
861 # Otherwise just use the URL
862 $text = htmlspecialchars( $url );
865 # Have link text, e.g. [http://domain.tld/some.link text]s
867 if ( preg_match( $linktrail, $trail, $m2 ) ) {
873 $encUrl = htmlspecialchars( $url );
874 # Bit in parentheses showing the URL for the printable version
875 if( $url == $text ||
preg_match( "!$protocol://" . preg_quote( $text, '/' ) . "/?$!", $url ) ) {
878 # Expand the URL for printable version
879 if ( ! $sk->suppressUrlExpansion() ) {
880 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
886 # Process the trail (i.e. everything after this link up until start of the next link),
887 # replacing any non-bracketed links
888 $trail = $this->replaceFreeExternalLinks( $trail );
890 $la = $sk->getExternalLinkAttributes( $url, $text );
892 # Use the encoded URL
893 # This means that users can paste URLs directly into the text
894 # Funny characters like ö aren't valid in URLs anyway
895 # This was changed in August 2004
896 $s .= "<a href=\"{$url}\" {$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
899 wfProfileOut( $fname );
903 # Replace anything that looks like a URL with a link
904 function replaceFreeExternalLinks( $text ) {
905 $bits = preg_split( '/((?:'.URL_PROTOCOLS
.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE
);
906 $s = array_shift( $bits );
909 $sk =& $this->mOptions
->getSkin();
911 while ( $i < count( $bits ) ){
912 $protocol = $bits[$i++
];
913 $remainder = $bits[$i++
];
915 if ( preg_match( '/^('.EXT_LINK_URL_CLASS
.'+)(.*)$/s', $remainder, $m ) ) {
916 # Found some characters after the protocol that look promising
917 $url = $protocol . $m[1];
920 # Move trailing punctuation to $trail
922 # If there is no left bracket, then consider right brackets fair game too
923 if ( strpos( $url, '(' ) === false ) {
927 $numSepChars = strspn( strrev( $url ), $sep );
928 if ( $numSepChars ) {
929 $trail = substr( $url, -$numSepChars ) . $trail;
930 $url = substr( $url, 0, -$numSepChars );
933 # Replace & from obsolete syntax with &
934 $url = str_replace( '&', '&', $url );
936 # Is this an external image?
937 $text = $this->maybeMakeImageLink( $url );
938 if ( $text === false ) {
939 # Not an image, make a link
940 $text = $sk->makeExternalLink( $url, $url );
942 $s .= $text . $trail;
944 $s .= $protocol . $remainder;
950 # make an image if it's allowed
951 function maybeMakeImageLink( $url ) {
952 $sk =& $this->mOptions
->getSkin();
954 if ( $this->mOptions
->getAllowExternalImages() ) {
955 if ( preg_match( EXT_IMAGE_REGEX
, $url ) ) {
957 $text = $sk->makeImage( htmlspecialchars( $url ) );
963 # The wikilinks [[ ]] are procedeed here.
964 /* private */ function replaceInternalLinks( $s ) {
965 global $wgLang, $wgLinkCache;
966 global $wgNamespacesWithSubpages, $wgLanguageCode;
967 static $fname = 'Parser::replaceInternalLinks' ;
968 wfProfileIn( $fname );
970 wfProfileIn( $fname.'-setup' );
972 # the % is needed to support urlencoded titles as well
973 if ( !$tc ) { $tc = Title
::legalChars() . '#%'; }
974 $sk =& $this->mOptions
->getSkin();
976 $redirect = MagicWord
::get ( MAG_REDIRECT
) ;
978 $a = explode( '[[', ' ' . $s );
979 $s = array_shift( $a );
980 $s = substr( $s, 1 );
982 # Match a link having the form [[namespace:link|alternate]]trail
984 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
985 # Match the end of a line for a word that's not followed by whitespace,
986 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
987 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
989 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
990 # Special and Media are pseudo-namespaces; no pages actually exist in them
992 $nottalk = !Namespace::isTalk( $this->mTitle
->getNamespace() );
994 if ( $useLinkPrefixExtension ) {
995 if ( preg_match( $e2, $s, $m ) ) {
996 $first_prefix = $m[2];
999 $first_prefix = false;
1005 wfProfileOut( $fname.'-setup' );
1007 # start procedeeding each line
1008 foreach ( $a as $line ) {
1009 wfProfileIn( $fname.'-prefixhandling' );
1010 if ( $useLinkPrefixExtension ) {
1011 if ( preg_match( $e2, $s, $m ) ) {
1019 $prefix = $first_prefix;
1020 $first_prefix = false;
1023 wfProfileOut( $fname.'-prefixhandling' );
1025 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1027 # fix up urlencoded title texts
1028 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1030 } else { # Invalid form; output directly
1031 $s .= $prefix . '[[' . $line ;
1037 # :Foobar -- override special treatment of prefix (images, language links)
1038 # /Foobar -- convert to CurrentPage/Foobar
1039 # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1041 # Look at the first character
1042 $c = substr($m[1],0,1);
1043 $noforce = ($c != ':');
1047 # / at end means we don't want the slash to be shown
1048 if(substr($m[1],-1,1)=='/') {
1049 $m[1]=substr($m[1],1,strlen($m[1])-2);
1052 $noslash=substr($m[1],1);
1055 # Some namespaces don't allow subpages
1056 if(!empty($wgNamespacesWithSubpages[$this->mTitle
->getNamespace()])) {
1057 # subpages allowed here
1058 $link = $this->mTitle
->getPrefixedText(). '/' . trim($noslash);
1061 } # this might be changed for ugliness reasons
1063 # no subpage allowed, use standard link
1067 } elseif( $noforce ) { # no subpage
1070 # We don't want to keep the first character
1071 $link = substr( $m[1], 1 );
1074 $wasblank = ( '' == $text );
1075 if( $wasblank ) $text = $link;
1077 $nt = Title
::newFromText( $link );
1079 $s .= $prefix . '[[' . $line;
1083 $ns = $nt->getNamespace();
1084 $iw = $nt->getInterWiki();
1086 # Link not escaped by : , create the various objects
1090 if( $iw && $this->mOptions
->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1091 array_push( $this->mOutput
->mLanguageLinks
, $nt->getFullText() );
1092 $tmp = $prefix . $trail ;
1093 $s .= (trim($tmp) == '')?
'': $tmp;
1097 if ( $ns == NS_IMAGE
) {
1098 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1099 $wgLinkCache->addImageLinkObj( $nt );
1103 if ( $ns == NS_CATEGORY
) {
1104 $t = $nt->getText() ;
1105 $nnt = Title
::newFromText ( Namespace::getCanonicalName(NS_CATEGORY
).':'.$t ) ;
1107 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1108 $pPLC=$sk->postParseLinkColour();
1109 $sk->postParseLinkColour( false );
1110 $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1111 $sk->postParseLinkColour( $pPLC );
1112 $wgLinkCache->resume();
1115 if ( $this->mTitle
->getNamespace() == NS_CATEGORY
) {
1116 $sortkey = $this->mTitle
->getText();
1118 $sortkey = $this->mTitle
->getPrefixedText();
1123 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1124 $this->mOutput
->mCategoryLinks
[] = $t ;
1125 $s .= $prefix . $trail ;
1130 if( ( $nt->getPrefixedText() === $this->mTitle
->getPrefixedText() ) &&
1131 ( strpos( $link, '#' ) === FALSE ) ) {
1132 # Self-links are handled specially; generally de-link and change to bold.
1133 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1137 if( $ns == NS_MEDIA
) {
1138 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1139 $wgLinkCache->addImageLinkObj( $nt );
1141 } elseif( $ns == NS_SPECIAL
) {
1142 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1145 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1147 wfProfileOut( $fname );
1151 # Some functions here used by doBlockLevels()
1153 /* private */ function closeParagraph() {
1155 if ( '' != $this->mLastSection
) {
1156 $result = '</' . $this->mLastSection
. ">\n";
1158 $this->mInPre
= false;
1159 $this->mLastSection
= '';
1162 # getCommon() returns the length of the longest common substring
1163 # of both arguments, starting at the beginning of both.
1165 /* private */ function getCommon( $st1, $st2 ) {
1166 $fl = strlen( $st1 );
1167 $shorter = strlen( $st2 );
1168 if ( $fl < $shorter ) { $shorter = $fl; }
1170 for ( $i = 0; $i < $shorter; ++
$i ) {
1171 if ( $st1{$i} != $st2{$i} ) { break; }
1175 # These next three functions open, continue, and close the list
1176 # element appropriate to the prefix character passed into them.
1178 /* private */ function openList( $char ) {
1179 $result = $this->closeParagraph();
1181 if ( '*' == $char ) { $result .= '<ul><li>'; }
1182 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1183 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1184 else if ( ';' == $char ) {
1185 $result .= '<dl><dt>';
1186 $this->mDTopen
= true;
1188 else { $result = '<!-- ERR 1 -->'; }
1193 /* private */ function nextItem( $char ) {
1194 if ( '*' == $char ||
'#' == $char ) { return '</li><li>'; }
1195 else if ( ':' == $char ||
';' == $char ) {
1197 if ( $this->mDTopen
) { $close = '</dt>'; }
1198 if ( ';' == $char ) {
1199 $this->mDTopen
= true;
1200 return $close . '<dt>';
1202 $this->mDTopen
= false;
1203 return $close . '<dd>';
1206 return '<!-- ERR 2 -->';
1209 /* private */ function closeList( $char ) {
1210 if ( '*' == $char ) { $text = '</li></ul>'; }
1211 else if ( '#' == $char ) { $text = '</li></ol>'; }
1212 else if ( ':' == $char ) {
1213 if ( $this->mDTopen
) {
1214 $this->mDTopen
= false;
1215 $text = '</dt></dl>';
1217 $text = '</dd></dl>';
1220 else { return '<!-- ERR 3 -->'; }
1224 /* private */ function doBlockLevels( $text, $linestart ) {
1225 $fname = 'Parser::doBlockLevels';
1226 wfProfileIn( $fname );
1228 # Parsing through the text line by line. The main thing
1229 # happening here is handling of block-level elements p, pre,
1230 # and making lists from lines starting with * # : etc.
1232 $textLines = explode( "\n", $text );
1234 $lastPrefix = $output = $lastLine = '';
1235 $this->mDTopen
= $inBlockElem = false;
1237 $paragraphStack = false;
1239 if ( !$linestart ) {
1240 $output .= array_shift( $textLines );
1242 foreach ( $textLines as $oLine ) {
1243 $lastPrefixLength = strlen( $lastPrefix );
1244 $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1245 $preOpenMatch = preg_match('/<pre/i', $oLine );
1246 if ( !$this->mInPre
) {
1247 # Multiple prefixes may abut each other for nested lists.
1248 $prefixLength = strspn( $oLine, '*#:;' );
1249 $pref = substr( $oLine, 0, $prefixLength );
1252 $pref2 = str_replace( ';', ':', $pref );
1253 $t = substr( $oLine, $prefixLength );
1254 $this->mInPre
= !empty($preOpenMatch);
1256 # Don't interpret any other prefixes in preformatted text
1258 $pref = $pref2 = '';
1263 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1264 # Same as the last item, so no need to deal with nesting or opening stuff
1265 $output .= $this->nextItem( substr( $pref, -1 ) );
1266 $paragraphStack = false;
1268 if ( substr( $pref, -1 ) == ';') {
1269 # The one nasty exception: definition lists work like this:
1270 # ; title : definition text
1271 # So we check for : in the remainder text to split up the
1272 # title and definition, without b0rking links.
1273 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1274 if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
1276 $output .= $term . $this->nextItem( ':' );
1280 } elseif( $prefixLength ||
$lastPrefixLength ) {
1281 # Either open or close a level...
1282 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1283 $paragraphStack = false;
1285 while( $commonPrefixLength < $lastPrefixLength ) {
1286 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1287 --$lastPrefixLength;
1289 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1290 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1292 while ( $prefixLength > $commonPrefixLength ) {
1293 $char = substr( $pref, $commonPrefixLength, 1 );
1294 $output .= $this->openList( $char );
1296 if ( ';' == $char ) {
1297 # FIXME: This is dupe of code above
1298 if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
1300 $output .= $term . $this->nextItem( ':' );
1304 ++
$commonPrefixLength;
1306 $lastPrefix = $pref2;
1308 if( 0 == $prefixLength ) {
1309 # No prefix (not in list)--go to paragraph mode
1310 $uniq_prefix = UNIQ_PREFIX
;
1311 // XXX: use a stack for nestable elements like span, table and div
1312 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1313 $closematch = preg_match(
1314 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1315 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1316 if ( $openmatch or $closematch ) {
1317 $paragraphStack = false;
1318 $output .= $this->closeParagraph();
1319 if($preOpenMatch and !$preCloseMatch) {
1320 $this->mInPre
= true;
1322 if ( $closematch ) {
1323 $inBlockElem = false;
1325 $inBlockElem = true;
1327 } else if ( !$inBlockElem && !$this->mInPre
) {
1328 if ( ' ' == $t{0} and ( $this->mLastSection
== 'pre' or trim($t) != '' ) ) {
1330 if ($this->mLastSection
!= 'pre') {
1331 $paragraphStack = false;
1332 $output .= $this->closeParagraph().'<pre>';
1333 $this->mLastSection
= 'pre';
1335 $t = substr( $t, 1 );
1338 if ( '' == trim($t) ) {
1339 if ( $paragraphStack ) {
1340 $output .= $paragraphStack.'<br />';
1341 $paragraphStack = false;
1342 $this->mLastSection
= 'p';
1344 if ($this->mLastSection
!= 'p' ) {
1345 $output .= $this->closeParagraph();
1346 $this->mLastSection
= '';
1347 $paragraphStack = '<p>';
1349 $paragraphStack = '</p><p>';
1353 if ( $paragraphStack ) {
1354 $output .= $paragraphStack;
1355 $paragraphStack = false;
1356 $this->mLastSection
= 'p';
1357 } else if ($this->mLastSection
!= 'p') {
1358 $output .= $this->closeParagraph().'<p>';
1359 $this->mLastSection
= 'p';
1365 if ($paragraphStack === false) {
1369 while ( $prefixLength ) {
1370 $output .= $this->closeList( $pref2{$prefixLength-1} );
1373 if ( '' != $this->mLastSection
) {
1374 $output .= '</' . $this->mLastSection
. '>';
1375 $this->mLastSection
= '';
1378 wfProfileOut( $fname );
1382 # Return value of a magic variable (like PAGENAME)
1383 function getVariableValue( $index ) {
1384 global $wgLang, $wgSitename, $wgServer;
1387 case MAG_CURRENTMONTH
:
1388 return $wgLang->formatNum( date( 'm' ) );
1389 case MAG_CURRENTMONTHNAME
:
1390 return $wgLang->getMonthName( date('n') );
1391 case MAG_CURRENTMONTHNAMEGEN
:
1392 return $wgLang->getMonthNameGen( date('n') );
1393 case MAG_CURRENTDAY
:
1394 return $wgLang->formatNum( date('j') );
1396 return $this->mTitle
->getText();
1398 return $this->mTitle
->getPartialURL();
1400 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1401 return $wgLang->getNsText($this->mTitle
->getNamespace()); # Patch by Dori
1402 case MAG_CURRENTDAYNAME
:
1403 return $wgLang->getWeekdayName( date('w')+
1 );
1404 case MAG_CURRENTYEAR
:
1405 return $wgLang->formatNum( date( 'Y' ) );
1406 case MAG_CURRENTTIME
:
1407 return $wgLang->time( wfTimestampNow(), false );
1408 case MAG_NUMBEROFARTICLES
:
1409 return $wgLang->formatNum( wfNumberOfArticles() );
1419 # initialise the magic variables (like CURRENTMONTHNAME)
1420 function initialiseVariables() {
1421 global $wgVariableIDs;
1422 $this->mVariables
= array();
1423 foreach ( $wgVariableIDs as $id ) {
1424 $mw =& MagicWord
::get( $id );
1425 $mw->addToArray( $this->mVariables
, $this->getVariableValue( $id ) );
1429 /* private */ function replaceVariables( $text, $args = array() ) {
1430 global $wgLang, $wgScript, $wgArticlePath;
1432 # Prevent too big inclusions
1433 if(strlen($text)> MAX_INCLUDE_SIZE
)
1436 $fname = 'Parser::replaceVariables';
1437 wfProfileIn( $fname );
1440 $titleChars = Title
::legalChars();
1441 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars );
1443 # This function is called recursively. To keep track of arguments we need a stack:
1444 array_push( $this->mArgStack
, $args );
1446 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1447 $GLOBALS['wgCurParser'] =& $this;
1449 if ( $this->mOutputType
== OT_HTML ||
$this->mOutputType
== OT_MSG
) {
1450 # Variable substitution
1451 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
1454 if ( $this->mOutputType
== OT_HTML
) {
1455 # Argument substitution
1456 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1458 # Template substitution
1459 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
1460 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1462 array_pop( $this->mArgStack
);
1464 wfProfileOut( $fname );
1468 function variableSubstitution( $matches ) {
1469 if ( !$this->mVariables
) {
1470 $this->initialiseVariables();
1472 if ( array_key_exists( $matches[1], $this->mVariables
) ) {
1473 $text = $this->mVariables
[$matches[1]];
1474 $this->mOutput
->mContainsOldMagic
= true;
1476 $text = $matches[0];
1481 # Split template arguments
1482 function getTemplateArgs( $argsString ) {
1483 if ( $argsString === '' ) {
1487 $args = explode( '|', substr( $argsString, 1 ) );
1489 # If any of the arguments contains a '[[' but no ']]', it needs to be
1490 # merged with the next arg because the '|' character between belongs
1491 # to the link syntax and not the template parameter syntax.
1492 $argc = count($args);
1494 for ( $i = 0; $i < $argc-1; $i++
) {
1495 if ( substr_count ( $args[$i], '[[' ) != substr_count ( $args[$i], ']]' ) ) {
1496 $args[$i] .= '|'.$args[$i+
1];
1497 array_splice($args, $i+
1, 1);
1506 function braceSubstitution( $matches ) {
1507 global $wgLinkCache, $wgLang;
1508 $fname = 'Parser::braceSubstitution';
1515 # $newline is an optional newline character before the braces
1516 # $part1 is the bit before the first |, and must contain only title characters
1517 # $args is a list of arguments, starting from index 0, not including $part1
1519 $newline = $matches[1];
1520 $part1 = $matches[2];
1521 # If the third subpattern matched anything, it will start with |
1523 $args = $this->getTemplateArgs($matches[3]);
1524 $argc = count( $args );
1527 if ( strpos( $matches[0], '{{{' ) !== false ) {
1528 $text = $matches[0];
1535 $mwSubst =& MagicWord
::get( MAG_SUBST
);
1536 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1537 if ( $this->mOutputType
!= OT_WIKI
) {
1538 # Invalid SUBST not replaced at PST time
1539 # Return without further processing
1540 $text = $matches[0];
1544 } elseif ( $this->mOutputType
== OT_WIKI
) {
1545 # SUBST not found in PST pass, do nothing
1546 $text = $matches[0];
1551 # MSG, MSGNW and INT
1554 $mwMsgnw =& MagicWord
::get( MAG_MSGNW
);
1555 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1558 # Remove obsolete MSG:
1559 $mwMsg =& MagicWord
::get( MAG_MSG
);
1560 $mwMsg->matchStartAndRemove( $part1 );
1563 # Check if it is an internal message
1564 $mwInt =& MagicWord
::get( MAG_INT
);
1565 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1566 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1567 $text = wfMsgReal( $part1, $args, true );
1575 # Check for NS: (namespace expansion)
1576 $mwNs = MagicWord
::get( MAG_NS
);
1577 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1578 if ( intval( $part1 ) ) {
1579 $text = $wgLang->getNsText( intval( $part1 ) );
1582 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1583 if ( !is_null( $index ) ) {
1584 $text = $wgLang->getNsText( $index );
1591 # LOCALURL and LOCALURLE
1593 $mwLocal = MagicWord
::get( MAG_LOCALURL
);
1594 $mwLocalE = MagicWord
::get( MAG_LOCALURLE
);
1596 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1597 $func = 'getLocalURL';
1598 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1599 $func = 'escapeLocalURL';
1604 if ( $func !== '' ) {
1605 $title = Title
::newFromText( $part1 );
1606 if ( !is_null( $title ) ) {
1608 $text = $title->$func( $args[0] );
1610 $text = $title->$func();
1617 # Internal variables
1618 if ( !$this->mVariables
) {
1619 $this->initialiseVariables();
1621 if ( !$found && array_key_exists( $part1, $this->mVariables
) ) {
1622 $text = $this->mVariables
[$part1];
1624 $this->mOutput
->mContainsOldMagic
= true;
1628 if ( !$found && $argc == 1 ) {
1629 $mwGrammar =& MagicWord
::get( MAG_GRAMMAR
);
1630 if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
1631 $text = $wgLang->convertGrammar( $args[0], $part1 );
1636 # Template table test
1638 # Did we encounter this template already? If yes, it is in the cache
1639 # and we need to check for loops.
1640 if ( isset( $this->mTemplates
[$part1] ) ) {
1641 # Infinite loop test
1642 if ( isset( $this->mTemplatePath
[$part1] ) ) {
1646 # set $text to cached message.
1647 $text = $this->mTemplates
[$part1];
1651 # Load from database
1653 $title = Title
::newFromText( $part1, NS_TEMPLATE
);
1654 if ( !is_null( $title ) && !$title->isExternal() ) {
1655 # Check for excessive inclusion
1656 $dbk = $title->getPrefixedDBkey();
1657 if ( $this->incrementIncludeCount( $dbk ) ) {
1658 # This should never be reached.
1659 $article = new Article( $title );
1660 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1661 if ( $articleContent !== false ) {
1663 $text = $articleContent;
1667 # If the title is valid but undisplayable, make a link to it
1668 if ( $this->mOutputType
== OT_HTML
&& !$found ) {
1669 $text = '[['.$title->getPrefixedText().']]';
1673 # Template cache array insertion
1674 $this->mTemplates
[$part1] = $text;
1678 # Recursive parsing, escaping and link table handling
1679 # Only for HTML output
1680 if ( $nowiki && $found && $this->mOutputType
== OT_HTML
) {
1681 $text = wfEscapeWikiText( $text );
1682 } elseif ( $this->mOutputType
== OT_HTML
&& $found && !$noparse) {
1683 # Clean up argument array
1684 $assocArgs = array();
1686 foreach( $args as $arg ) {
1687 $eqpos = strpos( $arg, '=' );
1688 if ( $eqpos === false ) {
1689 $assocArgs[$index++
] = $arg;
1691 $name = trim( substr( $arg, 0, $eqpos ) );
1692 $value = trim( substr( $arg, $eqpos+
1 ) );
1693 if ( $value === false ) {
1696 if ( $name !== false ) {
1697 $assocArgs[$name] = $value;
1702 # Do not enter included links in link table
1703 if ( !is_null( $title ) ) {
1704 $wgLinkCache->suspend();
1707 # Add a new element to the templace recursion path
1708 $this->mTemplatePath
[$part1] = 1;
1710 $text = $this->stripParse( $text, $newline, $assocArgs );
1712 # Resume the link cache and register the inclusion as a link
1713 if ( !is_null( $title ) ) {
1714 $wgLinkCache->resume();
1715 $wgLinkCache->addLinkObj( $title );
1719 # Empties the template path
1720 $this->mTemplatePath
= array();
1729 # Triple brace replacement -- used for template arguments
1730 function argSubstitution( $matches ) {
1731 $newline = $matches[1];
1732 $arg = trim( $matches[2] );
1733 $text = $matches[0];
1734 $inputArgs = end( $this->mArgStack
);
1736 if ( array_key_exists( $arg, $inputArgs ) ) {
1737 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1743 # Returns true if the function is allowed to include this entity
1744 function incrementIncludeCount( $dbk ) {
1745 if ( !array_key_exists( $dbk, $this->mIncludeCount
) ) {
1746 $this->mIncludeCount
[$dbk] = 0;
1748 if ( ++
$this->mIncludeCount
[$dbk] <= MAX_INCLUDE_REPEAT
) {
1756 # Cleans up HTML, removes dangerous tags and attributes
1757 /* private */ function removeHTMLtags( $text ) {
1758 global $wgUseTidy, $wgUserHtml;
1759 $fname = 'Parser::removeHTMLtags';
1760 wfProfileIn( $fname );
1763 $htmlpairs = array( # Tags that must be closed
1764 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
1765 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
1766 'strike', 'strong', 'tt', 'var', 'div', 'center',
1767 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
1768 'ruby', 'rt' , 'rb' , 'rp', 'p'
1770 $htmlsingle = array(
1771 'br', 'hr', 'li', 'dt', 'dd'
1773 $htmlnest = array( # Tags that can be nested--??
1774 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
1775 'dl', 'font', 'big', 'small', 'sub', 'sup'
1777 $tabletags = array( # Can only appear inside table
1781 $htmlpairs = array();
1782 $htmlsingle = array();
1783 $htmlnest = array();
1784 $tabletags = array();
1787 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1788 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1790 $htmlattrs = $this->getHTMLattrs () ;
1792 # Remove HTML comments
1793 $text = preg_replace( '/(\\n *<!--.*--> *|<!--.*?-->)/sU', '', $text );
1795 $bits = explode( '<', $text );
1796 $text = array_shift( $bits );
1798 $tagstack = array(); $tablestack = array();
1799 foreach ( $bits as $x ) {
1800 $prev = error_reporting( E_ALL
& ~
( E_NOTICE | E_WARNING
) );
1801 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1803 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1804 error_reporting( $prev );
1807 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1811 if ( ! in_array( $t, $htmlsingle ) &&
1812 ( $ot = @array_pop
( $tagstack ) ) != $t ) {
1813 @array_push
( $tagstack, $ot );
1816 if ( $t == 'table' ) {
1817 $tagstack = array_pop( $tablestack );
1822 # Keep track for later
1823 if ( in_array( $t, $tabletags ) &&
1824 ! in_array( 'table', $tagstack ) ) {
1826 } else if ( in_array( $t, $tagstack ) &&
1827 ! in_array ( $t , $htmlnest ) ) {
1829 } else if ( ! in_array( $t, $htmlsingle ) ) {
1830 if ( $t == 'table' ) {
1831 array_push( $tablestack, $tagstack );
1832 $tagstack = array();
1834 array_push( $tagstack, $t );
1836 # Strip non-approved attributes from the tag
1837 $newparams = $this->fixTagAttributes($params);
1841 $rest = str_replace( '>', '>', $rest );
1842 $text .= "<$slash$t $newparams$brace$rest";
1846 $text .= '<' . str_replace( '>', '>', $x);
1848 # Close off any remaining tags
1849 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1851 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
1854 # this might be possible using tidy itself
1855 foreach ( $bits as $x ) {
1856 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1858 @list
( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1859 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1860 $newparams = $this->fixTagAttributes($params);
1861 $rest = str_replace( '>', '>', $rest );
1862 $text .= "<$slash$t $newparams$brace$rest";
1864 $text .= '<' . str_replace( '>', '>', $x);
1868 wfProfileOut( $fname );
1873 # This function accomplishes several tasks:
1874 # 1) Auto-number headings if that option is enabled
1875 # 2) Add an [edit] link to sections for logged in users who have enabled the option
1876 # 3) Add a Table of contents on the top for users who have enabled the option
1877 # 4) Auto-anchor headings
1879 # It loops through all headlines, collects the necessary data, then splits up the
1880 # string and re-inserts the newly formatted headlines.
1881 /* private */ function formatHeadings( $text, $isMain=true ) {
1882 global $wgInputEncoding, $wgMaxTocLevel, $wgLang;
1884 $doNumberHeadings = $this->mOptions
->getNumberHeadings();
1885 $doShowToc = $this->mOptions
->getShowToc();
1886 $forceTocHere = false;
1887 if( !$this->mTitle
->userCanEdit() ) {
1889 $rightClickHack = 0;
1891 $showEditLink = $this->mOptions
->getEditSection();
1892 $rightClickHack = $this->mOptions
->getEditSectionOnRightClick();
1895 # Inhibit editsection links if requested in the page
1896 $esw =& MagicWord
::get( MAG_NOEDITSECTION
);
1897 if( $esw->matchAndRemove( $text ) ) {
1900 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1902 $mw =& MagicWord
::get( MAG_NOTOC
);
1903 if( $mw->matchAndRemove( $text ) ) {
1907 # never add the TOC to the Main Page. This is an entry page that should not
1908 # be more than 1-2 screens large anyway
1909 if( $this->mTitle
->getPrefixedText() == wfMsg('mainpage') ) {
1913 # Get all headlines for numbering them and adding funky stuff like [edit]
1914 # links - this is for later, but we need the number of headlines right now
1915 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
1917 # if there are fewer than 4 headlines in the article, do not show TOC
1918 if( $numMatches < 4 ) {
1922 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
1923 # override above conditions and always show TOC at that place
1924 $mw =& MagicWord
::get( MAG_TOC
);
1925 if ($mw->match( $text ) ) {
1927 $forceTocHere = true;
1929 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1930 # override above conditions and always show TOC above first header
1931 $mw =& MagicWord
::get( MAG_FORCETOC
);
1932 if ($mw->matchAndRemove( $text ) ) {
1939 # We need this to perform operations on the HTML
1940 $sk =& $this->mOptions
->getSkin();
1945 # Ugh .. the TOC should have neat indentation levels which can be
1946 # passed to the skin functions. These are determined here
1951 $sublevelCount = array();
1954 foreach( $matches[3] as $headline ) {
1957 $prevlevel = $level;
1959 $level = $matches[1][$headlineCount];
1960 if( ( $doNumberHeadings ||
$doShowToc ) && $prevlevel && $level > $prevlevel ) {
1961 # reset when we enter a new level
1962 $sublevelCount[$level] = 0;
1963 $toc .= $sk->tocIndent( $level - $prevlevel );
1964 $toclevel +
= $level - $prevlevel;
1966 if( ( $doNumberHeadings ||
$doShowToc ) && $level < $prevlevel ) {
1967 # reset when we step back a level
1968 $sublevelCount[$level+
1]=0;
1969 $toc .= $sk->tocUnindent( $prevlevel - $level );
1970 $toclevel -= $prevlevel - $level;
1972 # count number of headlines for each level
1973 @$sublevelCount[$level]++
;
1974 if( $doNumberHeadings ||
$doShowToc ) {
1976 for( $i = 1; $i <= $level; $i++
) {
1977 if( !empty( $sublevelCount[$i] ) ) {
1981 $numbering .= $wgLang->formatNum( $sublevelCount[$i] );
1987 # The canonized header is a version of the header text safe to use for links
1988 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1989 $canonized_headline = $this->unstrip( $headline, $this->mStripState
);
1990 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState
);
1992 # Remove link placeholders by the link text.
1993 # <!--LINK namespace page_title link text with suffix-->
1995 # link text with suffix
1996 $canonized_headline = preg_replace( '/<!--LINK [0-9]* [^ ]* *(.*?)-->/','$1', $canonized_headline );
1998 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
1999 $tocline = trim( $canonized_headline );
2000 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT
, $wgInputEncoding ) );
2001 $replacearray = array(
2005 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2006 $refer[$headlineCount] = $canonized_headline;
2008 # count how many in assoc. array so we can track dupes in anchors
2009 @$refers[$canonized_headline]++
;
2010 $refcount[$headlineCount]=$refers[$canonized_headline];
2012 # Prepend the number to the heading text
2014 if( $doNumberHeadings ||
$doShowToc ) {
2015 $tocline = $numbering . ' ' . $tocline;
2017 # Don't number the heading if it is the only one (looks silly)
2018 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2019 # the two are different if the line contains a link
2020 $headline=$numbering . ' ' . $headline;
2024 # Create the anchor for linking from the TOC to the section
2025 $anchor = $canonized_headline;
2026 if($refcount[$headlineCount] > 1 ) {
2027 $anchor .= '_' . $refcount[$headlineCount];
2029 if( $doShowToc && ( !isset($wgMaxTocLevel) ||
$toclevel<$wgMaxTocLevel ) ) {
2030 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2032 if( $showEditLink ) {
2033 if ( empty( $head[$headlineCount] ) ) {
2034 $head[$headlineCount] = '';
2036 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+
1);
2039 # Add the edit section span
2040 if( $rightClickHack ) {
2041 $headline = $sk->editSectionScript($headlineCount+
1,$headline);
2044 # give headline the correct <h#> tag
2045 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.'</h'.$level.'>';
2051 $toclines = $headlineCount;
2052 $toc .= $sk->tocUnindent( $toclevel );
2053 $toc = $sk->tocTable( $toc );
2056 # split up and insert constructed headlines
2058 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2061 foreach( $blocks as $block ) {
2062 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2063 # This is the [edit] link that appears for the top block of text when
2064 # section editing is enabled
2066 # Disabled because it broke block formatting
2067 # For example, a bullet point in the top line
2068 # $full .= $sk->editSectionLink(0);
2071 if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2072 # Top anchor now in skin
2076 if( !empty( $head[$i] ) ) {
2082 $mw =& MagicWord
::get( MAG_TOC
);
2083 return $mw->replace( $toc, $full );
2089 # Return an HTML link for the "ISBN 123456" text
2090 /* private */ function magicISBN( $text ) {
2092 $fname = 'Parser::magicISBN';
2093 wfProfileIn( $fname );
2095 $a = split( 'ISBN ', ' '.$text );
2096 if ( count ( $a ) < 2 ) {
2097 wfProfileOut( $fname );
2100 $text = substr( array_shift( $a ), 1);
2101 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2103 foreach ( $a as $x ) {
2104 $isbn = $blank = '' ;
2105 while ( ' ' == $x{0} ) {
2107 $x = substr( $x, 1 );
2109 if ( $x == '' ) { # blank isbn
2110 $text .= "ISBN $blank";
2113 while ( strstr( $valid, $x{0} ) != false ) {
2115 $x = substr( $x, 1 );
2117 $num = str_replace( '-', '', $isbn );
2118 $num = str_replace( ' ', '', $num );
2121 $text .= "ISBN $blank$x";
2123 $titleObj = Title
::makeTitle( NS_SPECIAL
, 'Booksources' );
2124 $text .= '<a href="' .
2125 $titleObj->escapeLocalUrl( 'isbn='.$num ) .
2126 "\" class=\"internal\">ISBN $isbn</a>";
2130 wfProfileOut( $fname );
2134 # Return an HTML link for the "GEO ..." text
2135 /* private */ function magicGEO( $text ) {
2136 global $wgLang, $wgUseGeoMode;
2137 $fname = 'Parser::magicGEO';
2138 wfProfileIn( $fname );
2140 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2141 $directions = array ( 'N' => 'North' , 'S' => 'South' , 'E' => 'East' , 'W' => 'West' ) ;
2142 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['N']}, (\d+)°(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2143 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['N']}, (\d+)°(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2144 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['S']}, (\d+)°(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2145 $text = preg_replace ( "/(\d+)°(\d+)'(\d+)\" {$directions['S']}, (\d+)°(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2147 $a = split( 'GEO ', ' '.$text );
2148 if ( count ( $a ) < 2 ) {
2149 wfProfileOut( $fname );
2152 $text = substr( array_shift( $a ), 1);
2153 $valid = '0123456789.+-:';
2155 foreach ( $a as $x ) {
2156 $geo = $blank = '' ;
2157 while ( ' ' == $x{0} ) {
2159 $x = substr( $x, 1 );
2161 while ( strstr( $valid, $x{0} ) != false ) {
2163 $x = substr( $x, 1 );
2165 $num = str_replace( '+', '', $geo );
2166 $num = str_replace( ' ', '', $num );
2168 if ( '' == $num ||
count ( explode ( ':' , $num , 3 ) ) < 2 ) {
2169 $text .= "GEO $blank$x";
2171 $titleObj = Title
::makeTitle( NS_SPECIAL
, 'Geo' );
2172 $text .= '<a href="' .
2173 $titleObj->escapeLocalUrl( 'coordinates='.$num ) .
2174 "\" class=\"internal\">GEO $geo</a>";
2178 wfProfileOut( $fname );
2182 # Return an HTML link for the "RFC 1234" text
2183 /* private */ function magicRFC( $text ) {
2186 $a = split( 'RFC ', ' '.$text );
2187 if ( count ( $a ) < 2 ) return $text;
2188 $text = substr( array_shift( $a ), 1);
2189 $valid = '0123456789';
2191 foreach ( $a as $x ) {
2192 $rfc = $blank = '' ;
2193 while ( ' ' == $x{0} ) {
2195 $x = substr( $x, 1 );
2197 while ( strstr( $valid, $x{0} ) != false ) {
2199 $x = substr( $x, 1 );
2203 $text .= "RFC $blank$x";
2205 $url = wfmsg( 'rfcurl' );
2206 $url = str_replace( '$1', $rfc, $url);
2207 $sk =& $this->mOptions
->getSkin();
2208 $la = $sk->getExternalLinkAttributes( $url, 'RFC '.$rfc );
2209 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2215 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2216 $this->mOptions
= $options;
2217 $this->mTitle
=& $title;
2218 $this->mOutputType
= OT_WIKI
;
2220 if ( $clearState ) {
2221 $this->clearState();
2224 $stripState = false;
2228 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2232 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2233 "/<br *?>/i" => "<br />",
2235 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2237 $text = $this->strip( $text, $stripState, false );
2238 $text = $this->pstPass2( $text, $user );
2239 $text = $this->unstrip( $text, $stripState );
2240 $text = $this->unstripNoWiki( $text, $stripState );
2244 /* private */ function pstPass2( $text, &$user ) {
2245 global $wgLang, $wgLocaltimezone, $wgCurParser;
2247 # Variable replacement
2248 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2249 $text = $this->replaceVariables( $text );
2253 $n = $user->getName();
2254 $k = $user->getOption( 'nickname' );
2255 if ( '' == $k ) { $k = $n; }
2256 if(isset($wgLocaltimezone)) {
2257 $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2259 /* Note: this is an ugly timezone hack for the European wikis */
2260 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2261 ' (' . date( 'T' ) . ')';
2262 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2264 $text = preg_replace( '/~~~~~/', $d, $text );
2265 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText( NS_USER
) . ":$n|$k]] $d", $text );
2266 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText( NS_USER
) . ":$n|$k]]", $text );
2268 # Context links: [[|name]] and [[name (context)|]]
2270 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2271 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2272 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2273 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2275 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
2276 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
2277 $p3 = "/\[\[(:*$namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]] and [[:namespace:page|]]
2278 $p4 = "/\[\[(:*$namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/"; # [[ns:page (cont)|]] and [[:ns:page (cont)|]]
2280 $t = $this->mTitle
->getText();
2281 if ( preg_match( $conpat, $t, $m ) ) {
2284 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2285 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2286 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2288 if ( '' == $context ) {
2289 $text = preg_replace( $p2, '[[\\1]]', $text );
2291 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2295 $mw =& MagicWord::get( MAG_SUBST );
2296 $wgCurParser = $this->fork();
2297 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2298 $this->merge( $wgCurParser );
2301 # Trim trailing whitespace
2302 # MAG_END (__END__) tag allows for trailing
2303 # whitespace to be deliberately included
2304 $text = rtrim( $text );
2305 $mw =& MagicWord
::get( MAG_END
);
2306 $mw->matchAndRemove( $text );
2311 # Set up some variables which are usually set up in parse()
2312 # so that an external function can call some class members with confidence
2313 function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2314 $this->mTitle
=& $title;
2315 $this->mOptions
= $options;
2316 $this->mOutputType
= $outputType;
2317 if ( $clearState ) {
2318 $this->clearState();
2322 function transformMsg( $text, $options ) {
2324 static $executing = false;
2326 # Guard against infinite recursion
2332 $this->mTitle
= $wgTitle;
2333 $this->mOptions
= $options;
2334 $this->mOutputType
= OT_MSG
;
2335 $this->clearState();
2336 $text = $this->replaceVariables( $text );
2342 # Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2343 # Callback will be called with the text within
2344 # Transform and return the text within
2345 function setHook( $tag, $callback ) {
2346 $oldVal = @$this->mTagHooks
[$tag];
2347 $this->mTagHooks
[$tag] = $callback;
2354 * @package MediaWiki
2358 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2359 var $mCacheTime; # Used in ParserCache
2361 function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
2362 $containsOldMagic = false )
2364 $this->mText
= $text;
2365 $this->mLanguageLinks
= $languageLinks;
2366 $this->mCategoryLinks
= $categoryLinks;
2367 $this->mContainsOldMagic
= $containsOldMagic;
2368 $this->mCacheTime
= '';
2371 function getText() { return $this->mText
; }
2372 function getLanguageLinks() { return $this->mLanguageLinks
; }
2373 function getCategoryLinks() { return $this->mCategoryLinks
; }
2374 function getCacheTime() { return $this->mCacheTime
; }
2375 function containsOldMagic() { return $this->mContainsOldMagic
; }
2376 function setText( $text ) { return wfSetVar( $this->mText
, $text ); }
2377 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks
, $ll ); }
2378 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks
, $cl ); }
2379 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic
, $com ); }
2380 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime
, $t ); }
2382 function merge( $other ) {
2383 $this->mLanguageLinks
= array_merge( $this->mLanguageLinks
, $other->mLanguageLinks
);
2384 $this->mCategoryLinks
= array_merge( $this->mCategoryLinks
, $this->mLanguageLinks
);
2385 $this->mContainsOldMagic
= $this->mContainsOldMagic ||
$other->mContainsOldMagic
;
2391 * Set options of the Parser
2393 * @package MediaWiki
2397 # All variables are private
2398 var $mUseTeX; # Use texvc to expand <math> tags
2399 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2400 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2401 var $mAllowExternalImages; # Allow external images inline
2402 var $mSkin; # Reference to the preferred skin
2403 var $mDateFormat; # Date format index
2404 var $mEditSection; # Create "edit section" links
2405 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2406 var $mNumberHeadings; # Automatically number headings
2407 var $mShowToc; # Show table of contents
2409 function getUseTeX() { return $this->mUseTeX
; }
2410 function getUseDynamicDates() { return $this->mUseDynamicDates
; }
2411 function getInterwikiMagic() { return $this->mInterwikiMagic
; }
2412 function getAllowExternalImages() { return $this->mAllowExternalImages
; }
2413 function getSkin() { return $this->mSkin
; }
2414 function getDateFormat() { return $this->mDateFormat
; }
2415 function getEditSection() { return $this->mEditSection
; }
2416 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick
; }
2417 function getNumberHeadings() { return $this->mNumberHeadings
; }
2418 function getShowToc() { return $this->mShowToc
; }
2420 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX
, $x ); }
2421 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates
, $x ); }
2422 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic
, $x ); }
2423 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages
, $x ); }
2424 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat
, $x ); }
2425 function setEditSection( $x ) { return wfSetVar( $this->mEditSection
, $x ); }
2426 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick
, $x ); }
2427 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings
, $x ); }
2428 function setShowToc( $x ) { return wfSetVar( $this->mShowToc
, $x ); }
2430 function setSkin( &$x ) { $this->mSkin
=& $x; }
2432 # Get parser options
2433 /* static */ function newFromUser( &$user ) {
2434 $popts = new ParserOptions
;
2435 $popts->initialiseFromUser( $user );
2440 function initialiseFromUser( &$userInput ) {
2441 global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2443 $fname = 'ParserOptions::initialiseFromUser';
2444 wfProfileIn( $fname );
2445 if ( !$userInput ) {
2447 $user->setLoaded( true );
2449 $user =& $userInput;
2452 $this->mUseTeX
= $wgUseTeX;
2453 $this->mUseDynamicDates
= $wgUseDynamicDates;
2454 $this->mInterwikiMagic
= $wgInterwikiMagic;
2455 $this->mAllowExternalImages
= $wgAllowExternalImages;
2456 wfProfileIn( $fname.'-skin' );
2457 $this->mSkin
=& $user->getSkin();
2458 wfProfileOut( $fname.'-skin' );
2459 $this->mDateFormat
= $user->getOption( 'date' );
2460 $this->mEditSection
= $user->getOption( 'editsection' );
2461 $this->mEditSectionOnRightClick
= $user->getOption( 'editsectiononrightclick' );
2462 $this->mNumberHeadings
= $user->getOption( 'numberheadings' );
2463 $this->mShowToc
= $user->getOption( 'showtoc' );
2464 wfProfileOut( $fname );
2470 # Regex callbacks, used in Parser::replaceVariables
2471 function wfBraceSubstitution( $matches ) {
2472 global $wgCurParser;
2473 return $wgCurParser->braceSubstitution( $matches );
2476 function wfArgSubstitution( $matches ) {
2477 global $wgCurParser;
2478 return $wgCurParser->argSubstitution( $matches );
2481 function wfVariableSubstitution( $matches ) {
2482 global $wgCurParser;
2483 return $wgCurParser->variableSubstitution( $matches );
2487 * Return the total number of articles
2489 function wfNumberOfArticles() {
2490 global $wgNumberOfArticles;
2493 return $wgNumberOfArticles;
2497 * Get various statistics from the database
2500 function wfLoadSiteStats() {
2501 global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2502 $fname = 'wfLoadSiteStats';
2504 if ( -1 != $wgNumberOfArticles ) return;
2505 $dbr =& wfGetDB( DB_SLAVE
);
2506 $s = $dbr->getArray( 'site_stats',
2507 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2508 array( 'ss_row_id' => 1 ), $fname
2511 if ( $s === false ) {
2514 $wgTotalViews = $s->ss_total_views
;
2515 $wgTotalEdits = $s->ss_total_edits
;
2516 $wgNumberOfArticles = $s->ss_good_articles
;
2520 function wfEscapeHTMLTagsOnly( $in ) {
2522 array( '"', '>', '<' ),
2523 array( '"', '>', '<' ),