make section headers in templates have edit links that point to the template.
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 // require_once('Tokenizer.php');
4
5 /**
6 * File for Parser and related classes
7 *
8 * @package MediaWiki
9 * @version $Id$
10 */
11
12 /**
13 * Variable substitution O(N^2) attack
14 *
15 * Without countermeasures, it would be possible to attack the parser by saving
16 * a page filled with a large number of inclusions of large pages. The size of
17 * the generated page would be proportional to the square of the input size.
18 * Hence, we limit the number of inclusions of any given page, thus bringing any
19 * attack back to O(N).
20 */
21 define( 'MAX_INCLUDE_REPEAT', 100 );
22 define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Million
23
24 # Allowed values for $mOutputType
25 define( 'OT_HTML', 1 );
26 define( 'OT_WIKI', 2 );
27 define( 'OT_MSG' , 3 );
28
29 # string parameter for extractTags which will cause it
30 # to strip HTML comments in addition to regular
31 # <XML>-style tags. This should not be anything we
32 # may want to use in wikisyntax
33 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
34
35 # prefix for escaping, used in two functions at least
36 define( 'UNIQ_PREFIX', 'NaodW29');
37
38 # Constants needed for external link processing
39 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
40 define( 'HTTP_PROTOCOLS', 'http|https' );
41 # Everything except bracket, space, or control characters
42 define( 'EXT_LINK_URL_CLASS', '[^]\\x00-\\x20\\x7F]' );
43 define( 'INVERSE_EXT_LINK_URL_CLASS', '[\]\\x00-\\x20\\x7F]' );
44 # Including space
45 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
46 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
47 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
48 define( 'EXT_LINK_BRACKETED', '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
49 define( 'EXT_IMAGE_REGEX',
50 '/^('.HTTP_PROTOCOLS.':)'. # Protocol
51 '('.EXT_LINK_URL_CLASS.'+)\\/'. # Hostname and path
52 '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
53 );
54
55 $wgCurrentSectionNumber = 0; # XXX
56
57 /**
58 * PHP Parser
59 *
60 * Processes wiki markup
61 *
62 * <pre>
63 * There are three main entry points into the Parser class:
64 * parse()
65 * produces HTML output
66 * preSaveTransform().
67 * produces altered wiki markup.
68 * transformMsg()
69 * performs brace substitution on MediaWiki messages
70 *
71 * Globals used:
72 * objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
73 *
74 * NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
75 *
76 * settings:
77 * $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
78 * $wgNamespacesWithSubpages, $wgAllowExternalImages*,
79 * $wgLocaltimezone
80 *
81 * * only within ParserOptions
82 * </pre>
83 *
84 * @package MediaWiki
85 */
86 class Parser
87 {
88 /**#@+
89 * @access private
90 */
91 # Persistent:
92 var $mTagHooks;
93
94 # Cleared with clearState():
95 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
96 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
97
98 # Temporary:
99 var $mOptions, $mTitle, $mOutputType,
100 $mTemplates, // cache of already loaded templates, avoids
101 // multiple SQL queries for the same string
102 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
103 // in this path. Used for loop detection.
104
105 /**#@-*/
106
107 /**
108 * Constructor
109 *
110 * @access public
111 */
112 function Parser() {
113 $this->mTemplates = array();
114 $this->mTemplatePath = array();
115 $this->mTagHooks = array();
116 $this->clearState();
117 }
118
119 /**
120 * Clear Parser state
121 *
122 * @access private
123 */
124 function clearState() {
125 $this->mOutput = new ParserOutput;
126 $this->mAutonumber = 0;
127 $this->mLastSection = "";
128 $this->mDTopen = false;
129 $this->mVariables = false;
130 $this->mIncludeCount = array();
131 $this->mStripState = array();
132 $this->mArgStack = array();
133 $this->mInPre = false;
134 }
135
136 /**
137 * First pass--just handle <nowiki> sections, pass the rest off
138 * to internalParse() which does all the real work.
139 *
140 * @access private
141 * @return ParserOutput a ParserOutput
142 */
143 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
144 global $wgUseTidy;
145 $fname = 'Parser::parse';
146 wfProfileIn( $fname );
147
148 if ( $clearState ) {
149 $this->clearState();
150 }
151
152 $this->mOptions = $options;
153 $this->mTitle =& $title;
154 $this->mOutputType = OT_HTML;
155
156 $stripState = NULL;
157 $text = $this->strip( $text, $this->mStripState );
158 $text = $this->internalParse( $text, $linestart );
159 $text = $this->unstrip( $text, $this->mStripState );
160 # Clean up special characters, only run once, next-to-last before doBlockLevels
161 if(!$wgUseTidy) {
162 $fixtags = array(
163 # french spaces, last one Guillemet-left
164 # only if there is something before the space
165 '/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
166 # french spaces, Guillemet-right
167 "/(\\302\\253) /i"=>"\\1&nbsp;",
168 '/<hr *>/i' => '<hr />',
169 '/<br *>/i' => '<br />',
170 '/<center *>/i' => '<div class="center">',
171 '/<\\/center *>/i' => '</div>',
172 # Clean up spare ampersands; note that we probably ought to be
173 # more careful about named entities.
174 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
175 );
176 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
177 } else {
178 $fixtags = array(
179 # french spaces, last one Guillemet-left
180 '/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
181 # french spaces, Guillemet-right
182 '/(\\302\\253) /i' => '\\1&nbsp;',
183 '/<center *>/i' => '<div class="center">',
184 '/<\\/center *>/i' => '</div>'
185 );
186 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
187 }
188 # only once and last
189 $text = $this->doBlockLevels( $text, $linestart );
190 $text = $this->unstripNoWiki( $text, $this->mStripState );
191 if($wgUseTidy) {
192 $text = $this->tidy($text);
193 }
194 $this->mOutput->setText( $text );
195 wfProfileOut( $fname );
196 return $this->mOutput;
197 }
198
199 /**
200 * Get a random string
201 *
202 * @access private
203 * @static
204 */
205 function getRandomString() {
206 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
207 }
208
209 /**
210 * Replaces all occurrences of <$tag>content</$tag> in the text
211 * with a random marker and returns the new text. the output parameter
212 * $content will be an associative array filled with data on the form
213 * $unique_marker => content.
214 *
215 * If $content is already set, the additional entries will be appended
216 * If $tag is set to STRIP_COMMENTS, the function will extract
217 * <!-- HTML comments -->
218 *
219 * @access private
220 * @static
221 */
222 function extractTags($tag, $text, &$content, $uniq_prefix = ''){
223 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
224 if ( !$content ) {
225 $content = array( );
226 }
227 $n = 1;
228 $stripped = '';
229
230 while ( '' != $text ) {
231 if($tag==STRIP_COMMENTS) {
232 $p = preg_split( '/<!--/i', $text, 2 );
233 } else {
234 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
235 }
236 $stripped .= $p[0];
237 if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
238 $text = '';
239 } else {
240 if($tag==STRIP_COMMENTS) {
241 $q = preg_split( '/-->/i', $p[1], 2 );
242 } else {
243 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
244 }
245 $marker = $rnd . sprintf('%08X', $n++);
246 $content[$marker] = $q[0];
247 $stripped .= $marker;
248 $text = $q[1];
249 }
250 }
251 return $stripped;
252 }
253
254 /**
255 * Strips and renders nowiki, pre, math, hiero
256 * If $render is set, performs necessary rendering operations on plugins
257 * Returns the text, and fills an array with data needed in unstrip()
258 * If the $state is already a valid strip state, it adds to the state
259 *
260 * @param bool $stripcomments when set, HTML comments <!-- like this -->
261 * will be stripped in addition to other tags. This is important
262 * for section editing, where these comments cause confusion when
263 * counting the sections in the wikisource
264 *
265 * @access private
266 */
267 function strip( $text, &$state, $stripcomments = false ) {
268 $render = ($this->mOutputType == OT_HTML);
269 $html_content = array();
270 $nowiki_content = array();
271 $math_content = array();
272 $pre_content = array();
273 $comment_content = array();
274 $ext_content = array();
275
276 # Replace any instances of the placeholders
277 $uniq_prefix = UNIQ_PREFIX;
278 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
279
280 # html
281 global $wgRawHtml, $wgWhitelistEdit;
282 if( $wgRawHtml && $wgWhitelistEdit ) {
283 $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
284 foreach( $html_content as $marker => $content ) {
285 if ($render ) {
286 # Raw and unchecked for validity.
287 $html_content[$marker] = $content;
288 } else {
289 $html_content[$marker] = '<html>'.$content.'</html>';
290 }
291 }
292 }
293
294 # nowiki
295 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
296 foreach( $nowiki_content as $marker => $content ) {
297 if( $render ){
298 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
299 } else {
300 $nowiki_content[$marker] = '<nowiki>'.$content.'</nowiki>';
301 }
302 }
303
304 # math
305 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
306 foreach( $math_content as $marker => $content ){
307 if( $render ) {
308 if( $this->mOptions->getUseTeX() ) {
309 $math_content[$marker] = renderMath( $content );
310 } else {
311 $math_content[$marker] = '&lt;math&gt;'.$content.'&lt;math&gt;';
312 }
313 } else {
314 $math_content[$marker] = '<math>'.$content.'</math>';
315 }
316 }
317
318 # pre
319 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
320 foreach( $pre_content as $marker => $content ){
321 if( $render ){
322 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
323 } else {
324 $pre_content[$marker] = '<pre>'.$content.'</pre>';
325 }
326 }
327
328 # Comments
329 if($stripcomments) {
330 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
331 foreach( $comment_content as $marker => $content ){
332 $comment_content[$marker] = '<!--'.$content.'-->';
333 }
334 }
335
336 # Extensions
337 foreach ( $this->mTagHooks as $tag => $callback ) {
338 $ext_contents[$tag] = array();
339 $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
340 foreach( $ext_content[$tag] as $marker => $content ) {
341 if ( $render ) {
342 $ext_content[$tag][$marker] = $callback( $content );
343 } else {
344 $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
345 }
346 }
347 }
348
349 # Merge state with the pre-existing state, if there is one
350 if ( $state ) {
351 $state['html'] = $state['html'] + $html_content;
352 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
353 $state['math'] = $state['math'] + $math_content;
354 $state['pre'] = $state['pre'] + $pre_content;
355 $state['comment'] = $state['comment'] + $comment_content;
356
357 foreach( $ext_content as $tag => $array ) {
358 if ( array_key_exists( $tag, $state ) ) {
359 $state[$tag] = $state[$tag] + $array;
360 }
361 }
362 } else {
363 $state = array(
364 'html' => $html_content,
365 'nowiki' => $nowiki_content,
366 'math' => $math_content,
367 'pre' => $pre_content,
368 'comment' => $comment_content,
369 ) + $ext_content;
370 }
371 return $text;
372 }
373
374 /**
375 * restores pre, math, and heiro removed by strip()
376 *
377 * always call unstripNoWiki() after this one
378 * @access private
379 */
380 function unstrip( $text, &$state ) {
381 # Must expand in reverse order, otherwise nested tags will be corrupted
382 $contentDict = end( $state );
383 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
384 if( key($state) != 'nowiki' && key($state) != 'html') {
385 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
386 $text = str_replace( key( $contentDict ), $content, $text );
387 }
388 }
389 }
390
391 return $text;
392 }
393
394 /**
395 * always call this after unstrip() to preserve the order
396 *
397 * @access private
398 */
399 function unstripNoWiki( $text, &$state ) {
400 # Must expand in reverse order, otherwise nested tags will be corrupted
401 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
402 $text = str_replace( key( $state['nowiki'] ), $content, $text );
403 }
404
405 global $wgRawHtml;
406 if ($wgRawHtml) {
407 for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
408 $text = str_replace( key( $state['html'] ), $content, $text );
409 }
410 }
411
412 return $text;
413 }
414
415 /**
416 * Add an item to the strip state
417 * Returns the unique tag which must be inserted into the stripped text
418 * The tag will be replaced with the original text in unstrip()
419 *
420 * @access private
421 */
422 function insertStripItem( $text, &$state ) {
423 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
424 if ( !$state ) {
425 $state = array(
426 'html' => array(),
427 'nowiki' => array(),
428 'math' => array(),
429 'pre' => array()
430 );
431 }
432 $state['item'][$rnd] = $text;
433 return $rnd;
434 }
435
436 /**
437 * Return allowed HTML attributes
438 *
439 * @access private
440 */
441 function getHTMLattrs () {
442 $htmlattrs = array( # Allowed attributes--no scripting, etc.
443 'title', 'align', 'lang', 'dir', 'width', 'height',
444 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
445 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
446 /* FONT */ 'type', 'start', 'value', 'compact',
447 /* For various lists, mostly deprecated but safe */
448 'summary', 'width', 'border', 'frame', 'rules',
449 'cellspacing', 'cellpadding', 'valign', 'char',
450 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
451 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
452 'id', 'class', 'name', 'style' /* For CSS */
453 );
454 return $htmlattrs ;
455 }
456
457 /**
458 * Remove non approved attributes and javascript in css
459 *
460 * @access private
461 */
462 function fixTagAttributes ( $t ) {
463 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
464 $htmlattrs = $this->getHTMLattrs() ;
465
466 # Strip non-approved attributes from the tag
467 $t = preg_replace(
468 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
469 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
470 $t);
471
472 $t = str_replace ( '<></>' , '' , $t ) ; # This should fix bug 980557
473
474 # Strip javascript "expression" from stylesheets. Brute force approach:
475 # If anythin offensive is found, all attributes of the HTML tag are dropped
476
477 if( preg_match(
478 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
479 wfMungeToUtf8( $t ) ) )
480 {
481 $t='';
482 }
483
484 return trim ( $t ) ;
485 }
486
487 /**
488 * interface with html tidy, used if $wgUseTidy = true
489 *
490 * @access private
491 */
492 function tidy ( $text ) {
493 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
494 global $wgInputEncoding, $wgOutputEncoding;
495 $fname = 'Parser::tidy';
496 wfProfileIn( $fname );
497
498 $cleansource = '';
499 switch(strtoupper($wgOutputEncoding)) {
500 case 'ISO-8859-1':
501 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
502 break;
503 case 'UTF-8':
504 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
505 break;
506 default:
507 $wgTidyOpts .= ' -raw';
508 }
509
510 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
511 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
512 '<head><title>test</title></head><body>'.$text.'</body></html>';
513 $descriptorspec = array(
514 0 => array('pipe', 'r'),
515 1 => array('pipe', 'w'),
516 2 => array('file', '/dev/null', 'a')
517 );
518 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
519 if (is_resource($process)) {
520 fwrite($pipes[0], $wrappedtext);
521 fclose($pipes[0]);
522 while (!feof($pipes[1])) {
523 $cleansource .= fgets($pipes[1], 1024);
524 }
525 fclose($pipes[1]);
526 $return_value = proc_close($process);
527 }
528
529 wfProfileOut( $fname );
530
531 if( $cleansource == '' && $text != '') {
532 wfDebug( "Tidy error detected!\n" );
533 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
534 } else {
535 return $cleansource;
536 }
537 }
538
539 /**
540 * parse the wiki syntax used to render tables
541 *
542 * @access private
543 */
544 function doTableStuff ( $t ) {
545 $fname = 'Parser::doTableStuff';
546 wfProfileIn( $fname );
547
548 $t = explode ( "\n" , $t ) ;
549 $td = array () ; # Is currently a td tag open?
550 $ltd = array () ; # Was it TD or TH?
551 $tr = array () ; # Is currently a tr tag open?
552 $ltr = array () ; # tr attributes
553 $indent_level = 0; # indent level of the table
554 foreach ( $t AS $k => $x )
555 {
556 $x = trim ( $x ) ;
557 $fc = substr ( $x , 0 , 1 ) ;
558 if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
559 $indent_level = strlen( $matches[1] );
560 $t[$k] = "\n" .
561 str_repeat( '<dl><dd>', $indent_level ) .
562 '<table ' . $this->fixTagAttributes ( $matches[2] ) . '>' ;
563 array_push ( $td , false ) ;
564 array_push ( $ltd , '' ) ;
565 array_push ( $tr , false ) ;
566 array_push ( $ltr , '' ) ;
567 }
568 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
569 else if ( '|}' == substr ( $x , 0 , 2 ) ) {
570 $z = "</table>\n" ;
571 $l = array_pop ( $ltd ) ;
572 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
573 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
574 array_pop ( $ltr ) ;
575 $t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
576 }
577 else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
578 $x = substr ( $x , 1 ) ;
579 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
580 $z = '' ;
581 $l = array_pop ( $ltd ) ;
582 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
583 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
584 array_pop ( $ltr ) ;
585 $t[$k] = $z ;
586 array_push ( $tr , false ) ;
587 array_push ( $td , false ) ;
588 array_push ( $ltd , '' ) ;
589 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
590 }
591 else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
592 if ( '|+' == substr ( $x , 0 , 2 ) ) {
593 $fc = '+' ;
594 $x = substr ( $x , 1 ) ;
595 }
596 $after = substr ( $x , 1 ) ;
597 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
598 $after = explode ( '||' , $after ) ;
599 $t[$k] = '' ;
600 foreach ( $after AS $theline )
601 {
602 $z = '' ;
603 if ( $fc != '+' )
604 {
605 $tra = array_pop ( $ltr ) ;
606 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
607 array_push ( $tr , true ) ;
608 array_push ( $ltr , '' ) ;
609 }
610
611 $l = array_pop ( $ltd ) ;
612 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
613 if ( $fc == '|' ) $l = 'td' ;
614 else if ( $fc == '!' ) $l = 'th' ;
615 else if ( $fc == '+' ) $l = 'caption' ;
616 else $l = '' ;
617 array_push ( $ltd , $l ) ;
618 $y = explode ( '|' , $theline , 2 ) ;
619 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
620 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
621 $t[$k] .= $y ;
622 array_push ( $td , true ) ;
623 }
624 }
625 }
626
627 # Closing open td, tr && table
628 while ( count ( $td ) > 0 )
629 {
630 if ( array_pop ( $td ) ) $t[] = '</td>' ;
631 if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
632 $t[] = '</table>' ;
633 }
634
635 $t = implode ( "\n" , $t ) ;
636 # $t = $this->removeHTMLtags( $t );
637 wfProfileOut( $fname );
638 return $t ;
639 }
640
641 /**
642 * Helper function for parse() that transforms wiki markup into
643 * HTML. Only called for $mOutputType == OT_HTML.
644 *
645 * @access private
646 */
647 function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
648 global $wgLang;
649
650 $fname = 'Parser::internalParse';
651 wfProfileIn( $fname );
652
653 $text = $this->removeHTMLtags( $text );
654 $text = $this->replaceVariables( $text, $args );
655
656 $text = $wgLang->convert($text);
657
658 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
659
660 $text = $this->doHeadings( $text );
661 if($this->mOptions->getUseDynamicDates()) {
662 global $wgDateFormatter;
663 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
664 }
665 $text = $this->doAllQuotes( $text );
666 $text = $this->replaceExternalLinks( $text );
667 $text = $this->doMagicLinks( $text );
668 $text = $this->replaceInternalLinks ( $text );
669 # Another call to replace links and images inside captions of images
670 $text = $this->replaceInternalLinks ( $text );
671
672 $text = $this->unstrip( $text, $this->mStripState );
673 $text = $this->unstripNoWiki( $text, $this->mStripState );
674
675 $text = $this->doTableStuff( $text );
676 $text = $this->formatHeadings( $text, $isMain );
677 $sk =& $this->mOptions->getSkin();
678 $text = $sk->transformContent( $text );
679
680 wfProfileOut( $fname );
681 return $text;
682 }
683
684 /**
685 * Replace special strings like "ISBN xxx" and "RFC xxx" with
686 * magic external links.
687 *
688 * @access private
689 */
690 function &doMagicLinks( &$text ) {
691 global $wgUseGeoMode;
692 $text = $this->magicISBN( $text );
693 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
694 $text = $this->magicGEO( $text );
695 }
696 $text = $this->magicRFC( $text );
697 return $text;
698 }
699
700 /**
701 * Parse ^^ tokens and return html
702 *
703 * @access private
704 */
705 function doExponent ( $text ) {
706 $fname = 'Parser::doExponent';
707 wfProfileIn( $fname);
708 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
709 wfProfileOut( $fname);
710 return $text;
711 }
712
713 /**
714 * Parse headers and return html
715 *
716 * @access private
717 */
718 function doHeadings( $text ) {
719 $fname = 'Parser::doHeadings';
720 wfProfileIn( $fname );
721 for ( $i = 6; $i >= 1; --$i ) {
722 $h = substr( '======', 0, $i );
723 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
724 "<h{$i}>\\1</h{$i}>\\2", $text );
725 }
726 wfProfileOut( $fname );
727 return $text;
728 }
729
730 /**
731 * Replace single quotes with HTML markup
732 * @access private
733 * @return string the altered text
734 */
735 function doAllQuotes( $text ) {
736 $fname = 'Parser::doAllQuotes';
737 wfProfileIn( $fname );
738 $outtext = '';
739 $lines = explode( "\n", $text );
740 foreach ( $lines as $line ) {
741 $outtext .= $this->doQuotes ( $line ) . "\n";
742 }
743 $outtext = substr($outtext, 0,-1);
744 wfProfileOut( $fname );
745 return $outtext;
746 }
747
748 /**
749 * Helper function for doAllQuotes()
750 * @access private
751 */
752 function doQuotes( $text ) {
753 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
754 if (count ($arr) == 1)
755 return $text;
756 else
757 {
758 # First, do some preliminary work. This may shift some apostrophes from
759 # being mark-up to being text. It also counts the number of occurrences
760 # of bold and italics mark-ups.
761 $i = 0;
762 $numbold = 0;
763 $numitalics = 0;
764 foreach ($arr as $r)
765 {
766 if (($i % 2) == 1)
767 {
768 # If there are ever four apostrophes, assume the first is supposed to
769 # be text, and the remaining three constitute mark-up for bold text.
770 if (strlen ($arr[$i]) == 4)
771 {
772 $arr[$i-1] .= "'";
773 $arr[$i] = "'''";
774 }
775 # If there are more than 5 apostrophes in a row, assume they're all
776 # text except for the last 5.
777 else if (strlen ($arr[$i]) > 5)
778 {
779 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
780 $arr[$i] = "'''''";
781 }
782 # Count the number of occurrences of bold and italics mark-ups.
783 # We are not counting sequences of five apostrophes.
784 if (strlen ($arr[$i]) == 2) $numitalics++; else
785 if (strlen ($arr[$i]) == 3) $numbold++; else
786 if (strlen ($arr[$i]) == 5) { $numitalics++; $numbold++; }
787 }
788 $i++;
789 }
790
791 # If there is an odd number of both bold and italics, it is likely
792 # that one of the bold ones was meant to be an apostrophe followed
793 # by italics. Which one we cannot know for certain, but it is more
794 # likely to be one that has a single-letter word before it.
795 if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
796 {
797 $i = 0;
798 $firstsingleletterword = -1;
799 $firstmultiletterword = -1;
800 $firstspace = -1;
801 foreach ($arr as $r)
802 {
803 if (($i % 2 == 1) and (strlen ($r) == 3))
804 {
805 $x1 = substr ($arr[$i-1], -1);
806 $x2 = substr ($arr[$i-1], -2, 1);
807 if ($x1 == ' ') {
808 if ($firstspace == -1) $firstspace = $i;
809 } else if ($x2 == ' ') {
810 if ($firstsingleletterword == -1) $firstsingleletterword = $i;
811 } else {
812 if ($firstmultiletterword == -1) $firstmultiletterword = $i;
813 }
814 }
815 $i++;
816 }
817
818 # If there is a single-letter word, use it!
819 if ($firstsingleletterword > -1)
820 {
821 $arr [ $firstsingleletterword ] = "''";
822 $arr [ $firstsingleletterword-1 ] .= "'";
823 }
824 # If not, but there's a multi-letter word, use that one.
825 else if ($firstmultiletterword > -1)
826 {
827 $arr [ $firstmultiletterword ] = "''";
828 $arr [ $firstmultiletterword-1 ] .= "'";
829 }
830 # ... otherwise use the first one that has neither.
831 # (notice that it is possible for all three to be -1 if, for example,
832 # there is only one pentuple-apostrophe in the line)
833 else if ($firstspace > -1)
834 {
835 $arr [ $firstspace ] = "''";
836 $arr [ $firstspace-1 ] .= "'";
837 }
838 }
839
840 # Now let's actually convert our apostrophic mush to HTML!
841 $output = '';
842 $buffer = '';
843 $state = '';
844 $i = 0;
845 foreach ($arr as $r)
846 {
847 if (($i % 2) == 0)
848 {
849 if ($state == 'both')
850 $buffer .= $r;
851 else
852 $output .= $r;
853 }
854 else
855 {
856 if (strlen ($r) == 2)
857 {
858 if ($state == 'i')
859 { $output .= '</i>'; $state = ''; }
860 else if ($state == 'bi')
861 { $output .= '</i>'; $state = 'b'; }
862 else if ($state == 'ib')
863 { $output .= '</b></i><b>'; $state = 'b'; }
864 else if ($state == 'both')
865 { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
866 else # $state can be 'b' or ''
867 { $output .= '<i>'; $state .= 'i'; }
868 }
869 else if (strlen ($r) == 3)
870 {
871 if ($state == 'b')
872 { $output .= '</b>'; $state = ''; }
873 else if ($state == 'bi')
874 { $output .= '</i></b><i>'; $state = 'i'; }
875 else if ($state == 'ib')
876 { $output .= '</b>'; $state = 'i'; }
877 else if ($state == 'both')
878 { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
879 else # $state can be 'i' or ''
880 { $output .= '<b>'; $state .= 'b'; }
881 }
882 else if (strlen ($r) == 5)
883 {
884 if ($state == 'b')
885 { $output .= '</b><i>'; $state = 'i'; }
886 else if ($state == 'i')
887 { $output .= '</i><b>'; $state = 'b'; }
888 else if ($state == 'bi')
889 { $output .= '</i></b>'; $state = ''; }
890 else if ($state == 'ib')
891 { $output .= '</b></i>'; $state = ''; }
892 else if ($state == 'both')
893 { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
894 else # ($state == '')
895 { $buffer = ''; $state = 'both'; }
896 }
897 }
898 $i++;
899 }
900 # Now close all remaining tags. Notice that the order is important.
901 if ($state == 'b' || $state == 'ib')
902 $output .= '</b>';
903 if ($state == 'i' || $state == 'bi' || $state == 'ib')
904 $output .= '</i>';
905 if ($state == 'bi')
906 $output .= '</b>';
907 if ($state == 'both')
908 $output .= '<b><i>'.$buffer.'</i></b>';
909 return $output;
910 }
911 }
912
913 /**
914 * Replace external links
915 *
916 * Note: we have to do external links before the internal ones,
917 * and otherwise take great care in the order of things here, so
918 * that we don't end up interpreting some URLs twice.
919 *
920 * @access private
921 */
922 function replaceExternalLinks( $text ) {
923 $fname = 'Parser::replaceExternalLinks';
924 wfProfileIn( $fname );
925
926 $sk =& $this->mOptions->getSkin();
927 $linktrail = wfMsg('linktrail');
928 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
929
930 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
931
932 $i = 0;
933 while ( $i<count( $bits ) ) {
934 $url = $bits[$i++];
935 $protocol = $bits[$i++];
936 $text = $bits[$i++];
937 $trail = $bits[$i++];
938
939 # If the link text is an image URL, replace it with an <img> tag
940 # This happened by accident in the original parser, but some people used it extensively
941 $img = $this->maybeMakeImageLink( $text );
942 if ( $img !== false ) {
943 $text = $img;
944 }
945
946 $dtrail = '';
947
948 # No link text, e.g. [http://domain.tld/some.link]
949 if ( $text == '' ) {
950 # Autonumber if allowed
951 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
952 $text = '[' . ++$this->mAutonumber . ']';
953 } else {
954 # Otherwise just use the URL
955 $text = htmlspecialchars( $url );
956 }
957 } else {
958 # Have link text, e.g. [http://domain.tld/some.link text]s
959 # Check for trail
960 if ( preg_match( $linktrail, $trail, $m2 ) ) {
961 $dtrail = $m2[1];
962 $trail = $m2[2];
963 }
964 }
965
966 $encUrl = htmlspecialchars( $url );
967 # Bit in parentheses showing the URL for the printable version
968 if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, '/' ) . "/?$!", $url ) ) {
969 $paren = '';
970 } else {
971 # Expand the URL for printable version
972 if ( ! $sk->suppressUrlExpansion() ) {
973 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
974 } else {
975 $paren = '';
976 }
977 }
978
979 # Process the trail (i.e. everything after this link up until start of the next link),
980 # replacing any non-bracketed links
981 $trail = $this->replaceFreeExternalLinks( $trail );
982
983 $la = $sk->getExternalLinkAttributes( $url, $text );
984
985 # Use the encoded URL
986 # This means that users can paste URLs directly into the text
987 # Funny characters like &ouml; aren't valid in URLs anyway
988 # This was changed in August 2004
989 $s .= "<a href=\"{$url}\" {$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
990 }
991
992 wfProfileOut( $fname );
993 return $s;
994 }
995
996 /**
997 * Replace anything that looks like a URL with a link
998 * @access private
999 */
1000 function replaceFreeExternalLinks( $text ) {
1001 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1002 $s = array_shift( $bits );
1003 $i = 0;
1004
1005 $sk =& $this->mOptions->getSkin();
1006
1007 while ( $i < count( $bits ) ){
1008 $protocol = $bits[$i++];
1009 $remainder = $bits[$i++];
1010
1011 if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
1012 # Found some characters after the protocol that look promising
1013 $url = $protocol . $m[1];
1014 $trail = $m[2];
1015
1016 # Move trailing punctuation to $trail
1017 $sep = ',;\.:!?';
1018 # If there is no left bracket, then consider right brackets fair game too
1019 if ( strpos( $url, '(' ) === false ) {
1020 $sep .= ')';
1021 }
1022
1023 $numSepChars = strspn( strrev( $url ), $sep );
1024 if ( $numSepChars ) {
1025 $trail = substr( $url, -$numSepChars ) . $trail;
1026 $url = substr( $url, 0, -$numSepChars );
1027 }
1028
1029 # Replace &amp; from obsolete syntax with &
1030 $url = str_replace( '&amp;', '&', $url );
1031
1032 # Is this an external image?
1033 $text = $this->maybeMakeImageLink( $url );
1034 if ( $text === false ) {
1035 # Not an image, make a link
1036 $text = $sk->makeExternalLink( $url, $url );
1037 }
1038 $s .= $text . $trail;
1039 } else {
1040 $s .= $protocol . $remainder;
1041 }
1042 }
1043 return $s;
1044 }
1045
1046 /**
1047 * make an image if it's allowed
1048 * @access private
1049 */
1050 function maybeMakeImageLink( $url ) {
1051 $sk =& $this->mOptions->getSkin();
1052 $text = false;
1053 if ( $this->mOptions->getAllowExternalImages() ) {
1054 if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
1055 # Image found
1056 $text = $sk->makeImage( htmlspecialchars( $url ) );
1057 }
1058 }
1059 return $text;
1060 }
1061
1062 /**
1063 * Process [[ ]] wikilinks
1064 *
1065 * @access private
1066 */
1067 function replaceInternalLinks( $s ) {
1068 global $wgLang, $wgLinkCache;
1069 global $wgNamespacesWithSubpages;
1070 static $fname = 'Parser::replaceInternalLinks' ;
1071 wfProfileIn( $fname );
1072
1073 wfProfileIn( $fname.'-setup' );
1074 static $tc = FALSE;
1075 # the % is needed to support urlencoded titles as well
1076 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
1077 $sk =& $this->mOptions->getSkin();
1078
1079 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
1080
1081 $a = explode( '[[', ' ' . $s );
1082 $s = array_shift( $a );
1083 $s = substr( $s, 1 );
1084
1085 # Match a link having the form [[namespace:link|alternate]]trail
1086 static $e1 = FALSE;
1087 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1088 # Match the end of a line for a word that's not followed by whitespace,
1089 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1090 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1091
1092 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
1093 # Special and Media are pseudo-namespaces; no pages actually exist in them
1094
1095 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1096
1097 if ( $useLinkPrefixExtension ) {
1098 if ( preg_match( $e2, $s, $m ) ) {
1099 $first_prefix = $m[2];
1100 $s = $m[1];
1101 } else {
1102 $first_prefix = false;
1103 }
1104 } else {
1105 $prefix = '';
1106 }
1107
1108 wfProfileOut( $fname.'-setup' );
1109
1110 # start procedeeding each line
1111 foreach ( $a as $line ) {
1112 wfProfileIn( $fname.'-prefixhandling' );
1113 if ( $useLinkPrefixExtension ) {
1114 if ( preg_match( $e2, $s, $m ) ) {
1115 $prefix = $m[2];
1116 $s = $m[1];
1117 } else {
1118 $prefix='';
1119 }
1120 # first link
1121 if($first_prefix) {
1122 $prefix = $first_prefix;
1123 $first_prefix = false;
1124 }
1125 }
1126 wfProfileOut( $fname.'-prefixhandling' );
1127
1128 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1129 $text = $m[2];
1130 # fix up urlencoded title texts
1131 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1132 $trail = $m[3];
1133 } else { # Invalid form; output directly
1134 $s .= $prefix . '[[' . $line ;
1135 continue;
1136 }
1137
1138 # Valid link forms:
1139 # Foobar -- normal
1140 # :Foobar -- override special treatment of prefix (images, language links)
1141 # /Foobar -- convert to CurrentPage/Foobar
1142 # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1143
1144 # Look at the first character
1145 $c = substr($m[1],0,1);
1146 $noforce = ($c != ':');
1147
1148 # subpage
1149 if( $c == '/' ) {
1150 # / at end means we don't want the slash to be shown
1151 if(substr($m[1],-1,1)=='/') {
1152 $m[1]=substr($m[1],1,strlen($m[1])-2);
1153 $noslash=$m[1];
1154 } else {
1155 $noslash=substr($m[1],1);
1156 }
1157
1158 # Some namespaces don't allow subpages
1159 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) {
1160 # subpages allowed here
1161 $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1162 if( '' == $text ) {
1163 $text= $m[1];
1164 } # this might be changed for ugliness reasons
1165 } else {
1166 # no subpage allowed, use standard link
1167 $link = $noslash;
1168 }
1169
1170 } elseif( $noforce ) { # no subpage
1171 $link = $m[1];
1172 } else {
1173 # We don't want to keep the first character
1174 $link = substr( $m[1], 1 );
1175 }
1176
1177 $wasblank = ( '' == $text );
1178 if( $wasblank ) $text = $link;
1179
1180 $nt = Title::newFromText( $link );
1181 if( !$nt ) {
1182 $s .= $prefix . '[[' . $line;
1183 continue;
1184 }
1185
1186 $ns = $nt->getNamespace();
1187 $iw = $nt->getInterWiki();
1188
1189 # Link not escaped by : , create the various objects
1190 if( $noforce ) {
1191
1192 # Interwikis
1193 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1194 array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1195 $tmp = $prefix . $trail ;
1196 $s .= (trim($tmp) == '')? '': $tmp;
1197 continue;
1198 }
1199
1200 if ( $ns == NS_IMAGE ) {
1201 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1202 $wgLinkCache->addImageLinkObj( $nt );
1203 continue;
1204 }
1205
1206 if ( $ns == NS_CATEGORY ) {
1207 $t = $nt->getText() ;
1208 $nnt = Title::newFromText ( Namespace::getCanonicalName(NS_CATEGORY).':'.$t ) ;
1209
1210 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1211 $pPLC=$sk->postParseLinkColour();
1212 $sk->postParseLinkColour( false );
1213 $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1214 $sk->postParseLinkColour( $pPLC );
1215 $wgLinkCache->resume();
1216
1217 if ( $wasblank ) {
1218 if ( $this->mTitle->getNamespace() == NS_CATEGORY ) {
1219 $sortkey = $this->mTitle->getText();
1220 } else {
1221 $sortkey = $this->mTitle->getPrefixedText();
1222 }
1223 } else {
1224 $sortkey = $text;
1225 }
1226 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1227 $this->mOutput->mCategoryLinks[] = $t ;
1228 $s .= $prefix . $trail ;
1229 continue;
1230 }
1231 }
1232
1233 if( ( $nt->getPrefixedText() === $this->mTitle->getPrefixedText() ) &&
1234 ( strpos( $link, '#' ) === FALSE ) ) {
1235 # Self-links are handled specially; generally de-link and change to bold.
1236 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1237 continue;
1238 }
1239
1240 if( $ns == NS_MEDIA ) {
1241 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1242 $wgLinkCache->addImageLinkObj( $nt );
1243 continue;
1244 } elseif( $ns == NS_SPECIAL ) {
1245 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1246 continue;
1247 }
1248 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1249 }
1250 wfProfileOut( $fname );
1251 return $s;
1252 }
1253
1254 /**#@+
1255 * Used by doBlockLevels()
1256 * @access private
1257 */
1258 /* private */ function closeParagraph() {
1259 $result = '';
1260 if ( '' != $this->mLastSection ) {
1261 $result = '</' . $this->mLastSection . ">\n";
1262 }
1263 $this->mInPre = false;
1264 $this->mLastSection = '';
1265 return $result;
1266 }
1267 # getCommon() returns the length of the longest common substring
1268 # of both arguments, starting at the beginning of both.
1269 #
1270 /* private */ function getCommon( $st1, $st2 ) {
1271 $fl = strlen( $st1 );
1272 $shorter = strlen( $st2 );
1273 if ( $fl < $shorter ) { $shorter = $fl; }
1274
1275 for ( $i = 0; $i < $shorter; ++$i ) {
1276 if ( $st1{$i} != $st2{$i} ) { break; }
1277 }
1278 return $i;
1279 }
1280 # These next three functions open, continue, and close the list
1281 # element appropriate to the prefix character passed into them.
1282 #
1283 /* private */ function openList( $char ) {
1284 $result = $this->closeParagraph();
1285
1286 if ( '*' == $char ) { $result .= '<ul><li>'; }
1287 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1288 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1289 else if ( ';' == $char ) {
1290 $result .= '<dl><dt>';
1291 $this->mDTopen = true;
1292 }
1293 else { $result = '<!-- ERR 1 -->'; }
1294
1295 return $result;
1296 }
1297
1298 /* private */ function nextItem( $char ) {
1299 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1300 else if ( ':' == $char || ';' == $char ) {
1301 $close = '</dd>';
1302 if ( $this->mDTopen ) { $close = '</dt>'; }
1303 if ( ';' == $char ) {
1304 $this->mDTopen = true;
1305 return $close . '<dt>';
1306 } else {
1307 $this->mDTopen = false;
1308 return $close . '<dd>';
1309 }
1310 }
1311 return '<!-- ERR 2 -->';
1312 }
1313
1314 /* private */ function closeList( $char ) {
1315 if ( '*' == $char ) { $text = '</li></ul>'; }
1316 else if ( '#' == $char ) { $text = '</li></ol>'; }
1317 else if ( ':' == $char ) {
1318 if ( $this->mDTopen ) {
1319 $this->mDTopen = false;
1320 $text = '</dt></dl>';
1321 } else {
1322 $text = '</dd></dl>';
1323 }
1324 }
1325 else { return '<!-- ERR 3 -->'; }
1326 return $text."\n";
1327 }
1328 /**#@-*/
1329
1330 /**
1331 * Make lists from lines starting with ':', '*', '#', etc.
1332 *
1333 * @access private
1334 * @return string the lists rendered as HTML
1335 */
1336 function doBlockLevels( $text, $linestart ) {
1337 $fname = 'Parser::doBlockLevels';
1338 wfProfileIn( $fname );
1339
1340 # Parsing through the text line by line. The main thing
1341 # happening here is handling of block-level elements p, pre,
1342 # and making lists from lines starting with * # : etc.
1343 #
1344 $textLines = explode( "\n", $text );
1345
1346 $lastPrefix = $output = $lastLine = '';
1347 $this->mDTopen = $inBlockElem = false;
1348 $prefixLength = 0;
1349 $paragraphStack = false;
1350
1351 if ( !$linestart ) {
1352 $output .= array_shift( $textLines );
1353 }
1354 foreach ( $textLines as $oLine ) {
1355 $lastPrefixLength = strlen( $lastPrefix );
1356 $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1357 $preOpenMatch = preg_match('/<pre/i', $oLine );
1358 if ( !$this->mInPre ) {
1359 # Multiple prefixes may abut each other for nested lists.
1360 $prefixLength = strspn( $oLine, '*#:;' );
1361 $pref = substr( $oLine, 0, $prefixLength );
1362
1363 # eh?
1364 $pref2 = str_replace( ';', ':', $pref );
1365 $t = substr( $oLine, $prefixLength );
1366 $this->mInPre = !empty($preOpenMatch);
1367 } else {
1368 # Don't interpret any other prefixes in preformatted text
1369 $prefixLength = 0;
1370 $pref = $pref2 = '';
1371 $t = $oLine;
1372 }
1373
1374 # List generation
1375 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1376 # Same as the last item, so no need to deal with nesting or opening stuff
1377 $output .= $this->nextItem( substr( $pref, -1 ) );
1378 $paragraphStack = false;
1379
1380 if ( substr( $pref, -1 ) == ';') {
1381 # The one nasty exception: definition lists work like this:
1382 # ; title : definition text
1383 # So we check for : in the remainder text to split up the
1384 # title and definition, without b0rking links.
1385 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1386 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1387 $term = $match[1];
1388 $output .= $term . $this->nextItem( ':' );
1389 $t = $match[2];
1390 }
1391 }
1392 } elseif( $prefixLength || $lastPrefixLength ) {
1393 # Either open or close a level...
1394 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1395 $paragraphStack = false;
1396
1397 while( $commonPrefixLength < $lastPrefixLength ) {
1398 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1399 --$lastPrefixLength;
1400 }
1401 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1402 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1403 }
1404 while ( $prefixLength > $commonPrefixLength ) {
1405 $char = substr( $pref, $commonPrefixLength, 1 );
1406 $output .= $this->openList( $char );
1407
1408 if ( ';' == $char ) {
1409 # FIXME: This is dupe of code above
1410 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1411 $term = $match[1];
1412 $output .= $term . $this->nextItem( ':' );
1413 $t = $match[2];
1414 }
1415 }
1416 ++$commonPrefixLength;
1417 }
1418 $lastPrefix = $pref2;
1419 }
1420 if( 0 == $prefixLength ) {
1421 # No prefix (not in list)--go to paragraph mode
1422 $uniq_prefix = UNIQ_PREFIX;
1423 // XXX: use a stack for nestable elements like span, table and div
1424 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1425 $closematch = preg_match(
1426 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1427 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1428 if ( $openmatch or $closematch ) {
1429 $paragraphStack = false;
1430 $output .= $this->closeParagraph();
1431 if($preOpenMatch and !$preCloseMatch) {
1432 $this->mInPre = true;
1433 }
1434 if ( $closematch ) {
1435 $inBlockElem = false;
1436 } else {
1437 $inBlockElem = true;
1438 }
1439 } else if ( !$inBlockElem && !$this->mInPre ) {
1440 if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1441 // pre
1442 if ($this->mLastSection != 'pre') {
1443 $paragraphStack = false;
1444 $output .= $this->closeParagraph().'<pre>';
1445 $this->mLastSection = 'pre';
1446 }
1447 $t = substr( $t, 1 );
1448 } else {
1449 // paragraph
1450 if ( '' == trim($t) ) {
1451 if ( $paragraphStack ) {
1452 $output .= $paragraphStack.'<br />';
1453 $paragraphStack = false;
1454 $this->mLastSection = 'p';
1455 } else {
1456 if ($this->mLastSection != 'p' ) {
1457 $output .= $this->closeParagraph();
1458 $this->mLastSection = '';
1459 $paragraphStack = '<p>';
1460 } else {
1461 $paragraphStack = '</p><p>';
1462 }
1463 }
1464 } else {
1465 if ( $paragraphStack ) {
1466 $output .= $paragraphStack;
1467 $paragraphStack = false;
1468 $this->mLastSection = 'p';
1469 } else if ($this->mLastSection != 'p') {
1470 $output .= $this->closeParagraph().'<p>';
1471 $this->mLastSection = 'p';
1472 }
1473 }
1474 }
1475 }
1476 }
1477 if ($paragraphStack === false) {
1478 $output .= $t."\n";
1479 }
1480 }
1481 while ( $prefixLength ) {
1482 $output .= $this->closeList( $pref2{$prefixLength-1} );
1483 --$prefixLength;
1484 }
1485 if ( '' != $this->mLastSection ) {
1486 $output .= '</' . $this->mLastSection . '>';
1487 $this->mLastSection = '';
1488 }
1489
1490 wfProfileOut( $fname );
1491 return $output;
1492 }
1493
1494 /**
1495 * Return value of a magic variable (like PAGENAME)
1496 *
1497 * @access private
1498 */
1499 function getVariableValue( $index ) {
1500 global $wgLang, $wgSitename, $wgServer;
1501
1502 switch ( $index ) {
1503 case MAG_CURRENTMONTH:
1504 return $wgLang->formatNum( date( 'm' ) );
1505 case MAG_CURRENTMONTHNAME:
1506 return $wgLang->getMonthName( date('n') );
1507 case MAG_CURRENTMONTHNAMEGEN:
1508 return $wgLang->getMonthNameGen( date('n') );
1509 case MAG_CURRENTDAY:
1510 return $wgLang->formatNum( date('j') );
1511 case MAG_PAGENAME:
1512 return $this->mTitle->getText();
1513 case MAG_PAGENAMEE:
1514 return $this->mTitle->getPartialURL();
1515 case MAG_NAMESPACE:
1516 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1517 return $wgLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
1518 case MAG_CURRENTDAYNAME:
1519 return $wgLang->getWeekdayName( date('w')+1 );
1520 case MAG_CURRENTYEAR:
1521 return $wgLang->formatNum( date( 'Y' ) );
1522 case MAG_CURRENTTIME:
1523 return $wgLang->time( wfTimestampNow(), false );
1524 case MAG_NUMBEROFARTICLES:
1525 return $wgLang->formatNum( wfNumberOfArticles() );
1526 case MAG_SITENAME:
1527 return $wgSitename;
1528 case MAG_SERVER:
1529 return $wgServer;
1530 default:
1531 return NULL;
1532 }
1533 }
1534
1535 /**
1536 * initialise the magic variables (like CURRENTMONTHNAME)
1537 *
1538 * @access private
1539 */
1540 function initialiseVariables() {
1541 global $wgVariableIDs;
1542 $this->mVariables = array();
1543 foreach ( $wgVariableIDs as $id ) {
1544 $mw =& MagicWord::get( $id );
1545 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1546 }
1547 }
1548
1549 /**
1550 * Replace magic variables, templates, and template arguments
1551 * with the appropriate text. Templates are substituted recursively,
1552 * taking care to avoid infinite loops.
1553 *
1554 * Note that the substitution depends on value of $mOutputType:
1555 * OT_WIKI: only {{subst:}} templates
1556 * OT_MSG: only magic variables
1557 * OT_HTML: all templates and magic variables
1558 *
1559 * @param string $tex The text to transform
1560 * @param array $args Key-value pairs representing template parameters to substitute
1561 * @access private
1562 */
1563 function replaceVariables( $text, $args = array() ) {
1564 global $wgLang, $wgScript, $wgArticlePath;
1565
1566 # Prevent too big inclusions
1567 if(strlen($text)> MAX_INCLUDE_SIZE)
1568 return $text;
1569
1570 $fname = 'Parser::replaceVariables';
1571 wfProfileIn( $fname );
1572
1573 $titleChars = Title::legalChars();
1574
1575 # This function is called recursively. To keep track of arguments we need a stack:
1576 array_push( $this->mArgStack, $args );
1577
1578 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1579 $GLOBALS['wgCurParser'] =& $this;
1580
1581 if ( $this->mOutputType == OT_HTML ) {
1582 # Argument substitution
1583 $text = preg_replace_callback( "/{{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1584 }
1585 # Template substitution
1586 $regex = '/{{(['.$titleChars.']*)(\\|.*?|)}}/s';
1587 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1588
1589 array_pop( $this->mArgStack );
1590
1591 wfProfileOut( $fname );
1592 return $text;
1593 }
1594
1595 # Split template arguments
1596 function getTemplateArgs( $argsString ) {
1597 if ( $argsString === '' ) {
1598 return array();
1599 }
1600
1601 $args = explode( '|', substr( $argsString, 1 ) );
1602
1603 # If any of the arguments contains a '[[' but no ']]', it needs to be
1604 # merged with the next arg because the '|' character between belongs
1605 # to the link syntax and not the template parameter syntax.
1606 $argc = count($args);
1607 $i = 0;
1608 for ( $i = 0; $i < $argc-1; $i++ ) {
1609 if ( substr_count ( $args[$i], '[[' ) != substr_count ( $args[$i], ']]' ) ) {
1610 $args[$i] .= '|'.$args[$i+1];
1611 array_splice($args, $i+1, 1);
1612 $i--;
1613 $argc--;
1614 }
1615 }
1616
1617 return $args;
1618 }
1619
1620 /**
1621 * Return the text of a template, after recursively
1622 * replacing any variables or templates within the template.
1623 *
1624 * @param array $matches The parts of the template
1625 * $matches[1]: the title, i.e. the part before the |
1626 * $matches[2]: the parameters (including a leading |), if any
1627 * @return string the text of the template
1628 * @access private
1629 */
1630 function braceSubstitution( $matches ) {
1631 global $wgLinkCache, $wgLang;
1632 $fname = 'Parser::braceSubstitution';
1633 $found = false;
1634 $nowiki = false;
1635 $noparse = false;
1636
1637 $title = NULL;
1638
1639 # $part1 is the bit before the first |, and must contain only title characters
1640 # $args is a list of arguments, starting from index 0, not including $part1
1641
1642 $part1 = $matches[1];
1643 # If the second subpattern matched anything, it will start with |
1644
1645 $args = $this->getTemplateArgs($matches[2]);
1646 $argc = count( $args );
1647
1648 # {{{}}}
1649 if ( strpos( $matches[0], '{{{' ) !== false ) {
1650 $text = $matches[0];
1651 $found = true;
1652 $noparse = true;
1653 }
1654
1655 # SUBST
1656 if ( !$found ) {
1657 $mwSubst =& MagicWord::get( MAG_SUBST );
1658 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1659 if ( $this->mOutputType != OT_WIKI ) {
1660 # Invalid SUBST not replaced at PST time
1661 # Return without further processing
1662 $text = $matches[0];
1663 $found = true;
1664 $noparse= true;
1665 }
1666 } elseif ( $this->mOutputType == OT_WIKI ) {
1667 # SUBST not found in PST pass, do nothing
1668 $text = $matches[0];
1669 $found = true;
1670 }
1671 }
1672
1673 # MSG, MSGNW and INT
1674 if ( !$found ) {
1675 # Check for MSGNW:
1676 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1677 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1678 $nowiki = true;
1679 } else {
1680 # Remove obsolete MSG:
1681 $mwMsg =& MagicWord::get( MAG_MSG );
1682 $mwMsg->matchStartAndRemove( $part1 );
1683 }
1684
1685 # Check if it is an internal message
1686 $mwInt =& MagicWord::get( MAG_INT );
1687 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1688 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1689 $text = wfMsgReal( $part1, $args, true );
1690 $found = true;
1691 }
1692 }
1693 }
1694
1695 # NS
1696 if ( !$found ) {
1697 # Check for NS: (namespace expansion)
1698 $mwNs = MagicWord::get( MAG_NS );
1699 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1700 if ( intval( $part1 ) ) {
1701 $text = $wgLang->getNsText( intval( $part1 ) );
1702 $found = true;
1703 } else {
1704 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1705 if ( !is_null( $index ) ) {
1706 $text = $wgLang->getNsText( $index );
1707 $found = true;
1708 }
1709 }
1710 }
1711 }
1712
1713 # LOCALURL and LOCALURLE
1714 if ( !$found ) {
1715 $mwLocal = MagicWord::get( MAG_LOCALURL );
1716 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1717
1718 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1719 $func = 'getLocalURL';
1720 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1721 $func = 'escapeLocalURL';
1722 } else {
1723 $func = '';
1724 }
1725
1726 if ( $func !== '' ) {
1727 $title = Title::newFromText( $part1 );
1728 if ( !is_null( $title ) ) {
1729 if ( $argc > 0 ) {
1730 $text = $title->$func( $args[0] );
1731 } else {
1732 $text = $title->$func();
1733 }
1734 $found = true;
1735 }
1736 }
1737 }
1738
1739 # Internal variables
1740 if ( !$this->mVariables ) {
1741 $this->initialiseVariables();
1742 }
1743 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1744 $text = $this->mVariables[$part1];
1745 $found = true;
1746 $this->mOutput->mContainsOldMagic = true;
1747 }
1748
1749 # GRAMMAR
1750 if ( !$found && $argc == 1 ) {
1751 $mwGrammar =& MagicWord::get( MAG_GRAMMAR );
1752 if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
1753 $text = $wgLang->convertGrammar( $args[0], $part1 );
1754 $found = true;
1755 }
1756 }
1757
1758 # Template table test
1759
1760 # Did we encounter this template already? If yes, it is in the cache
1761 # and we need to check for loops.
1762 if ( isset( $this->mTemplates[$part1] ) ) {
1763 # Infinite loop test
1764 if ( isset( $this->mTemplatePath[$part1] ) ) {
1765 $noparse = true;
1766 $found = true;
1767 }
1768 # set $text to cached message.
1769 $text = $this->mTemplates[$part1];
1770 $found = true;
1771 }
1772
1773 # Load from database
1774 if ( !$found ) {
1775 $title = Title::newFromText( $part1, NS_TEMPLATE );
1776 if ( !is_null( $title ) && !$title->isExternal() ) {
1777 # Check for excessive inclusion
1778 $dbk = $title->getPrefixedDBkey();
1779 if ( $this->incrementIncludeCount( $dbk ) ) {
1780 # This should never be reached.
1781 $article = new Article( $title );
1782 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1783 if ( $articleContent !== false ) {
1784 $found = true;
1785 $text = $articleContent;
1786 }
1787 }
1788
1789 # If the title is valid but undisplayable, make a link to it
1790 if ( $this->mOutputType == OT_HTML && !$found ) {
1791 $text = '[['.$title->getPrefixedText().']]';
1792 $found = true;
1793 }
1794
1795 # Template cache array insertion
1796 $this->mTemplates[$part1] = $text;
1797 }
1798 }
1799
1800 # Recursive parsing, escaping and link table handling
1801 # Only for HTML output
1802 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1803 $text = wfEscapeWikiText( $text );
1804 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1805 # Clean up argument array
1806 $assocArgs = array();
1807 $index = 1;
1808 foreach( $args as $arg ) {
1809 $eqpos = strpos( $arg, '=' );
1810 if ( $eqpos === false ) {
1811 $assocArgs[$index++] = $arg;
1812 } else {
1813 $name = trim( substr( $arg, 0, $eqpos ) );
1814 $value = trim( substr( $arg, $eqpos+1 ) );
1815 if ( $value === false ) {
1816 $value = '';
1817 }
1818 if ( $name !== false ) {
1819 $assocArgs[$name] = $value;
1820 }
1821 }
1822 }
1823
1824 # Do not enter included links in link table
1825 if ( !is_null( $title ) ) {
1826 $wgLinkCache->suspend();
1827 }
1828
1829 # Add a new element to the templace recursion path
1830 $this->mTemplatePath[$part1] = 1;
1831
1832 $text = $this->strip( $text, $this->mStripState );
1833 $text = $this->removeHTMLtags( $text );
1834 $text = $this->replaceVariables( $text, $assocArgs );
1835
1836 # Resume the link cache and register the inclusion as a link
1837 if ( !is_null( $title ) ) {
1838 $wgLinkCache->resume();
1839 $wgLinkCache->addLinkObj( $title );
1840 }
1841 }
1842
1843 # Empties the template path
1844 $this->mTemplatePath = array();
1845
1846 if ( !$found ) {
1847 return $matches[0];
1848 } else {
1849 # replace ==section headers==
1850 # XXX this needs to go away once we have a better parser.
1851 if ( $this->mOutputType != OT_WIKI ) {
1852 $encodedname = base64_encode($title->getPrefixedDBkey());
1853 $wfCurrentSectionNumber = 0;
1854 for ( $i = 1; $i <= 6; ++$i ) {
1855 $h = substr( '======', 0, $i );
1856 $text = preg_replace_callback( "/^({$h})([^=].*){$h}\\s?$/m",
1857 create_function('$matches',
1858 'return "${matches[1]}$matches[2] __MWTEMPLATESECTION='.$encodedname.
1859 '&" . wfGetSectionNumber() . "__${matches[1]}";'
1860 ), $text);
1861 }
1862 }
1863 return $text;
1864 }
1865 }
1866
1867 /**
1868 * Triple brace replacement -- used for template arguments
1869 * @access private
1870 */
1871 function argSubstitution( $matches ) {
1872 $arg = trim( $matches[1] );
1873 $text = $matches[0];
1874 $inputArgs = end( $this->mArgStack );
1875
1876 if ( array_key_exists( $arg, $inputArgs ) ) {
1877 $text = $this->strip( $inputArgs[$arg], $this->mStripState );
1878 $text = $this->removeHTMLtags( $text );
1879 $text = $this->replaceVariables( $text, array() );
1880 }
1881
1882 return $text;
1883 }
1884
1885 /**
1886 * Returns true if the function is allowed to include this entity
1887 * @access private
1888 */
1889 function incrementIncludeCount( $dbk ) {
1890 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1891 $this->mIncludeCount[$dbk] = 0;
1892 }
1893 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1894 return true;
1895 } else {
1896 return false;
1897 }
1898 }
1899
1900
1901 /**
1902 * Cleans up HTML, removes dangerous tags and attributes, and
1903 * removes HTML comments
1904 * @access private
1905 */
1906 function removeHTMLtags( $text ) {
1907 global $wgUseTidy, $wgUserHtml;
1908 $fname = 'Parser::removeHTMLtags';
1909 wfProfileIn( $fname );
1910
1911 if( $wgUserHtml ) {
1912 $htmlpairs = array( # Tags that must be closed
1913 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
1914 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
1915 'strike', 'strong', 'tt', 'var', 'div', 'center',
1916 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
1917 'ruby', 'rt' , 'rb' , 'rp', 'p'
1918 );
1919 $htmlsingle = array(
1920 'br', 'hr', 'li', 'dt', 'dd'
1921 );
1922 $htmlnest = array( # Tags that can be nested--??
1923 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
1924 'dl', 'font', 'big', 'small', 'sub', 'sup'
1925 );
1926 $tabletags = array( # Can only appear inside table
1927 'td', 'th', 'tr'
1928 );
1929 } else {
1930 $htmlpairs = array();
1931 $htmlsingle = array();
1932 $htmlnest = array();
1933 $tabletags = array();
1934 }
1935
1936 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1937 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1938
1939 $htmlattrs = $this->getHTMLattrs () ;
1940
1941 # Remove HTML comments
1942 $text = $this->removeHTMLcomments( $text );
1943
1944 $bits = explode( '<', $text );
1945 $text = array_shift( $bits );
1946 if(!$wgUseTidy) {
1947 $tagstack = array(); $tablestack = array();
1948 foreach ( $bits as $x ) {
1949 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1950 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1951 $x, $regs );
1952 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1953 error_reporting( $prev );
1954
1955 $badtag = 0 ;
1956 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1957 # Check our stack
1958 if ( $slash ) {
1959 # Closing a tag...
1960 if ( ! in_array( $t, $htmlsingle ) &&
1961 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1962 @array_push( $tagstack, $ot );
1963 $badtag = 1;
1964 } else {
1965 if ( $t == 'table' ) {
1966 $tagstack = array_pop( $tablestack );
1967 }
1968 $newparams = '';
1969 }
1970 } else {
1971 # Keep track for later
1972 if ( in_array( $t, $tabletags ) &&
1973 ! in_array( 'table', $tagstack ) ) {
1974 $badtag = 1;
1975 } else if ( in_array( $t, $tagstack ) &&
1976 ! in_array ( $t , $htmlnest ) ) {
1977 $badtag = 1 ;
1978 } else if ( ! in_array( $t, $htmlsingle ) ) {
1979 if ( $t == 'table' ) {
1980 array_push( $tablestack, $tagstack );
1981 $tagstack = array();
1982 }
1983 array_push( $tagstack, $t );
1984 }
1985 # Strip non-approved attributes from the tag
1986 $newparams = $this->fixTagAttributes($params);
1987
1988 }
1989 if ( ! $badtag ) {
1990 $rest = str_replace( '>', '&gt;', $rest );
1991 $text .= "<$slash$t $newparams$brace$rest";
1992 continue;
1993 }
1994 }
1995 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
1996 }
1997 # Close off any remaining tags
1998 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1999 $text .= "</$t>\n";
2000 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2001 }
2002 } else {
2003 # this might be possible using tidy itself
2004 foreach ( $bits as $x ) {
2005 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2006 $x, $regs );
2007 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2008 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2009 $newparams = $this->fixTagAttributes($params);
2010 $rest = str_replace( '>', '&gt;', $rest );
2011 $text .= "<$slash$t $newparams$brace$rest";
2012 } else {
2013 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2014 }
2015 }
2016 }
2017 wfProfileOut( $fname );
2018 return $text;
2019 }
2020
2021 /**
2022 * Remove '<!--', '-->', and everything between.
2023 * To avoid leaving blank lines, when a comment is both preceded
2024 * and followed by a newline (ignoring spaces), trim leading and
2025 * trailing spaces and one of the newlines.
2026 *
2027 * @access private
2028 */
2029 function removeHTMLcomments( $text ) {
2030 $fname='Parser::removeHTMLcomments';
2031 wfProfileIn( $fname );
2032 while (($start = strpos($text, '<!--')) !== false) {
2033 $end = strpos($text, '-->', $start + 4);
2034 if ($end === false) {
2035 # Unterminated comment; bail out
2036 break;
2037 }
2038
2039 $end += 3;
2040
2041 # Trim space and newline if the comment is both
2042 # preceded and followed by a newline
2043 $spaceStart = max($start - 1, 0);
2044 $spaceLen = $end - $spaceStart;
2045 while (substr($text, $spaceStart, 1) === ' ' && $spaceStart > 0) {
2046 $spaceStart--;
2047 $spaceLen++;
2048 }
2049 while (substr($text, $spaceStart + $spaceLen, 1) === ' ')
2050 $spaceLen++;
2051 if (substr($text, $spaceStart, 1) === "\n" and substr($text, $spaceStart + $spaceLen, 1) === "\n") {
2052 # Remove the comment, leading and trailing
2053 # spaces, and leave only one newline.
2054 $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
2055 }
2056 else {
2057 # Remove just the comment.
2058 $text = substr_replace($text, '', $start, $end - $start);
2059 }
2060 }
2061 wfProfileOut( $fname );
2062 return $text;
2063 }
2064
2065 /**
2066 * This function accomplishes several tasks:
2067 * 1) Auto-number headings if that option is enabled
2068 * 2) Add an [edit] link to sections for logged in users who have enabled the option
2069 * 3) Add a Table of contents on the top for users who have enabled the option
2070 * 4) Auto-anchor headings
2071 *
2072 * It loops through all headlines, collects the necessary data, then splits up the
2073 * string and re-inserts the newly formatted headlines.
2074 * @access private
2075 */
2076 /* private */ function formatHeadings( $text, $isMain=true ) {
2077 global $wgInputEncoding, $wgMaxTocLevel, $wgLang, $wgLinkHolders;
2078
2079 $doNumberHeadings = $this->mOptions->getNumberHeadings();
2080 $doShowToc = $this->mOptions->getShowToc();
2081 $forceTocHere = false;
2082 if( !$this->mTitle->userCanEdit() ) {
2083 $showEditLink = 0;
2084 $rightClickHack = 0;
2085 } else {
2086 $showEditLink = $this->mOptions->getEditSection();
2087 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
2088 }
2089
2090 # Inhibit editsection links if requested in the page
2091 $esw =& MagicWord::get( MAG_NOEDITSECTION );
2092 if( $esw->matchAndRemove( $text ) ) {
2093 $showEditLink = 0;
2094 }
2095 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2096 # do not add TOC
2097 $mw =& MagicWord::get( MAG_NOTOC );
2098 if( $mw->matchAndRemove( $text ) ) {
2099 $doShowToc = 0;
2100 }
2101
2102 # never add the TOC to the Main Page. This is an entry page that should not
2103 # be more than 1-2 screens large anyway
2104 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
2105 $doShowToc = 0;
2106 }
2107
2108 # Get all headlines for numbering them and adding funky stuff like [edit]
2109 # links - this is for later, but we need the number of headlines right now
2110 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2111
2112 # if there are fewer than 4 headlines in the article, do not show TOC
2113 if( $numMatches < 4 ) {
2114 $doShowToc = 0;
2115 }
2116
2117 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2118 # override above conditions and always show TOC at that place
2119 $mw =& MagicWord::get( MAG_TOC );
2120 if ($mw->match( $text ) ) {
2121 $doShowToc = 1;
2122 $forceTocHere = true;
2123 } else {
2124 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2125 # override above conditions and always show TOC above first header
2126 $mw =& MagicWord::get( MAG_FORCETOC );
2127 if ($mw->matchAndRemove( $text ) ) {
2128 $doShowToc = 1;
2129 }
2130 }
2131
2132
2133
2134 # We need this to perform operations on the HTML
2135 $sk =& $this->mOptions->getSkin();
2136
2137 # headline counter
2138 $headlineCount = 0;
2139 $sectionCount = 0; # headlineCount excluding template sections
2140
2141 # Ugh .. the TOC should have neat indentation levels which can be
2142 # passed to the skin functions. These are determined here
2143 $toclevel = 0;
2144 $toc = '';
2145 $full = '';
2146 $head = array();
2147 $sublevelCount = array();
2148 $level = 0;
2149 $prevlevel = 0;
2150 foreach( $matches[3] as $headline ) {
2151 $istemplate = 0;
2152 $templatetitle = "";
2153 $templatesection = 0;
2154
2155 if (preg_match("/__MWTEMPLATESECTION=([^&]+)&([^_]+)__/", $headline, $mat)) {
2156 $istemplate = 1;
2157 $templatetitle = base64_decode($mat[1]);
2158 $templatesection = 1 + (int)base64_decode($mat[2]);
2159 $headline = preg_replace("/__MWTEMPLATESECTION=([^&]+)&([^_]+)__/", "", $headline);
2160 }
2161
2162 $numbering = '';
2163 if( $level ) {
2164 $prevlevel = $level;
2165 }
2166 $level = $matches[1][$headlineCount];
2167 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
2168 # reset when we enter a new level
2169 $sublevelCount[$level] = 0;
2170 $toc .= $sk->tocIndent( $level - $prevlevel );
2171 $toclevel += $level - $prevlevel;
2172 }
2173 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
2174 # reset when we step back a level
2175 $sublevelCount[$level+1]=0;
2176 $toc .= $sk->tocUnindent( $prevlevel - $level );
2177 $toclevel -= $prevlevel - $level;
2178 }
2179 # count number of headlines for each level
2180 @$sublevelCount[$level]++;
2181 if( $doNumberHeadings || $doShowToc ) {
2182 $dot = 0;
2183 for( $i = 1; $i <= $level; $i++ ) {
2184 if( !empty( $sublevelCount[$i] ) ) {
2185 if( $dot ) {
2186 $numbering .= '.';
2187 }
2188 $numbering .= $wgLang->formatNum( $sublevelCount[$i] );
2189 $dot = 1;
2190 }
2191 }
2192 }
2193
2194 # The canonized header is a version of the header text safe to use for links
2195 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2196 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
2197 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
2198
2199 # Remove link placeholders by the link text.
2200 # <!--LINK number-->
2201 # turns into
2202 # link text with suffix
2203 $canonized_headline = preg_replace( '/<!--LINK ([0-9]*)-->/e',
2204 "\$wgLinkHolders['texts'][\$1]",
2205 $canonized_headline );
2206
2207 # strip out HTML
2208 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2209 $tocline = trim( $canonized_headline );
2210 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
2211 $replacearray = array(
2212 '%3A' => ':',
2213 '%' => '.'
2214 );
2215 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2216 $refer[$headlineCount] = $canonized_headline;
2217
2218 # count how many in assoc. array so we can track dupes in anchors
2219 @$refers[$canonized_headline]++;
2220 $refcount[$headlineCount]=$refers[$canonized_headline];
2221
2222 # Prepend the number to the heading text
2223
2224 if( $doNumberHeadings || $doShowToc ) {
2225 $tocline = $numbering . ' ' . $tocline;
2226
2227 # Don't number the heading if it is the only one (looks silly)
2228 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2229 # the two are different if the line contains a link
2230 $headline=$numbering . ' ' . $headline;
2231 }
2232 }
2233
2234 # Create the anchor for linking from the TOC to the section
2235 $anchor = $canonized_headline;
2236 if($refcount[$headlineCount] > 1 ) {
2237 $anchor .= '_' . $refcount[$headlineCount];
2238 }
2239 if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2240 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2241 }
2242 if( $showEditLink ) {
2243 if ( empty( $head[$headlineCount] ) ) {
2244 $head[$headlineCount] = '';
2245 }
2246 if( $istemplate )
2247 $head[$headlineCount] .= $sk->editSectionLinkForOther($templatetitle, $templatesection);
2248 else
2249 $head[$headlineCount] .= $sk->editSectionLink($sectionCount+1);
2250 }
2251
2252 # Add the edit section span
2253 if( $rightClickHack ) {
2254 $headline = $sk->editSectionScript($sectionCount+1,$headline);
2255 }
2256
2257 # give headline the correct <h#> tag
2258 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.'</h'.$level.'>';
2259
2260 $headlineCount++;
2261 if( !$istemplate )
2262 $sectionCount++;
2263 }
2264
2265 if( $doShowToc ) {
2266 $toclines = $headlineCount;
2267 $toc .= $sk->tocUnindent( $toclevel );
2268 $toc = $sk->tocTable( $toc );
2269 }
2270
2271 # split up and insert constructed headlines
2272
2273 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2274 $i = 0;
2275
2276 foreach( $blocks as $block ) {
2277 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2278 # This is the [edit] link that appears for the top block of text when
2279 # section editing is enabled
2280
2281 # Disabled because it broke block formatting
2282 # For example, a bullet point in the top line
2283 # $full .= $sk->editSectionLink(0);
2284 }
2285 $full .= $block;
2286 if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2287 # Top anchor now in skin
2288 $full = $full.$toc;
2289 }
2290
2291 if( !empty( $head[$i] ) ) {
2292 $full .= $head[$i];
2293 }
2294 $i++;
2295 }
2296 if($forceTocHere) {
2297 $mw =& MagicWord::get( MAG_TOC );
2298 return $mw->replace( $toc, $full );
2299 } else {
2300 return $full;
2301 }
2302 }
2303
2304 /**
2305 * Return an HTML link for the "ISBN 123456" text
2306 * @access private
2307 */
2308 function magicISBN( $text ) {
2309 global $wgLang;
2310 $fname = 'Parser::magicISBN';
2311 wfProfileIn( $fname );
2312
2313 $a = split( 'ISBN ', ' '.$text );
2314 if ( count ( $a ) < 2 ) {
2315 wfProfileOut( $fname );
2316 return $text;
2317 }
2318 $text = substr( array_shift( $a ), 1);
2319 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2320
2321 foreach ( $a as $x ) {
2322 $isbn = $blank = '' ;
2323 while ( ' ' == $x{0} ) {
2324 $blank .= ' ';
2325 $x = substr( $x, 1 );
2326 }
2327 if ( $x == '' ) { # blank isbn
2328 $text .= "ISBN $blank";
2329 continue;
2330 }
2331 while ( strstr( $valid, $x{0} ) != false ) {
2332 $isbn .= $x{0};
2333 $x = substr( $x, 1 );
2334 }
2335 $num = str_replace( '-', '', $isbn );
2336 $num = str_replace( ' ', '', $num );
2337
2338 if ( '' == $num ) {
2339 $text .= "ISBN $blank$x";
2340 } else {
2341 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2342 $text .= '<a href="' .
2343 $titleObj->escapeLocalUrl( 'isbn='.$num ) .
2344 "\" class=\"internal\">ISBN $isbn</a>";
2345 $text .= $x;
2346 }
2347 }
2348 wfProfileOut( $fname );
2349 return $text;
2350 }
2351
2352 /**
2353 * Return an HTML link for the "GEO ..." text
2354 * @access private
2355 */
2356 function magicGEO( $text ) {
2357 global $wgLang, $wgUseGeoMode;
2358 $fname = 'Parser::magicGEO';
2359 wfProfileIn( $fname );
2360
2361 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2362 $directions = array ( 'N' => 'North' , 'S' => 'South' , 'E' => 'East' , 'W' => 'West' ) ;
2363 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2364 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2365 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2366 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2367
2368 $a = split( 'GEO ', ' '.$text );
2369 if ( count ( $a ) < 2 ) {
2370 wfProfileOut( $fname );
2371 return $text;
2372 }
2373 $text = substr( array_shift( $a ), 1);
2374 $valid = '0123456789.+-:';
2375
2376 foreach ( $a as $x ) {
2377 $geo = $blank = '' ;
2378 while ( ' ' == $x{0} ) {
2379 $blank .= ' ';
2380 $x = substr( $x, 1 );
2381 }
2382 while ( strstr( $valid, $x{0} ) != false ) {
2383 $geo .= $x{0};
2384 $x = substr( $x, 1 );
2385 }
2386 $num = str_replace( '+', '', $geo );
2387 $num = str_replace( ' ', '', $num );
2388
2389 if ( '' == $num || count ( explode ( ':' , $num , 3 ) ) < 2 ) {
2390 $text .= "GEO $blank$x";
2391 } else {
2392 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2393 $text .= '<a href="' .
2394 $titleObj->escapeLocalUrl( 'coordinates='.$num ) .
2395 "\" class=\"internal\">GEO $geo</a>";
2396 $text .= $x;
2397 }
2398 }
2399 wfProfileOut( $fname );
2400 return $text;
2401 }
2402
2403 /**
2404 * Return an HTML link for the "RFC 1234" text
2405 * @access private
2406 * @param string $text text to be processed
2407 */
2408 function magicRFC( $text ) {
2409 global $wgLang;
2410
2411 $valid = '0123456789';
2412 $internal = false;
2413
2414 $a = split( 'RFC ', ' '.$text );
2415 if ( count ( $a ) < 2 ) return $text;
2416 $text = substr( array_shift( $a ), 1);
2417
2418 /* Check if RFC keyword is preceed by [[.
2419 * This test is made here cause of the array_shift above
2420 * that prevent the test to be done in the foreach.
2421 */
2422 if(substr($text, -2) == '[[') { $internal = true; }
2423
2424 foreach ( $a as $x ) {
2425 /* token might be empty if we have RFC RFC 1234 */
2426 if($x=='') {
2427 $text.='RFC ';
2428 continue;
2429 }
2430
2431 $rfc = $blank = '' ;
2432
2433 /** remove and save whitespaces in $blank */
2434 while ( $x{0} == ' ' ) {
2435 $blank .= ' ';
2436 $x = substr( $x, 1 );
2437 }
2438
2439 /** remove and save the rfc number in $rfc */
2440 while ( strstr( $valid, $x{0} ) != false ) {
2441 $rfc .= $x{0};
2442 $x = substr( $x, 1 );
2443 }
2444
2445 if ( $rfc == '') {
2446 /* call back stripped spaces*/
2447 $text .= "RFC $blank$x";
2448 } elseif( $internal) {
2449 /* normal link */
2450 $text .= "RFC $rfc$x";
2451 } else {
2452 /* build the external link*/
2453 $url = wfmsg( 'rfcurl' );
2454 $url = str_replace( '$1', $rfc, $url);
2455 $sk =& $this->mOptions->getSkin();
2456 $la = $sk->getExternalLinkAttributes( $url, 'RFC '.$rfc );
2457 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2458 }
2459
2460 /* Check if the next RFC keyword is preceed by [[ */
2461 $internal = (substr($x,-2) == '[[');
2462 }
2463 return $text;
2464 }
2465
2466 /**
2467 * Transform wiki markup when saving a page by doing \r\n -> \n
2468 * conversion, substitting signatures, {{subst:}} templates, etc.
2469 *
2470 * @param string $text the text to transform
2471 * @param Title &$title the Title object for the current article
2472 * @param User &$user the User object describing the current user
2473 * @param ParserOptions $options parsing options
2474 * @param bool $clearState whether to clear the parser state first
2475 * @return string the altered wiki markup
2476 * @access public
2477 */
2478 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2479 $this->mOptions = $options;
2480 $this->mTitle =& $title;
2481 $this->mOutputType = OT_WIKI;
2482
2483 if ( $clearState ) {
2484 $this->clearState();
2485 }
2486
2487 $stripState = false;
2488 $pairs = array(
2489 "\r\n" => "\n",
2490 );
2491 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2492 // now with regexes
2493 /*
2494 $pairs = array(
2495 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2496 "/<br *?>/i" => "<br />",
2497 );
2498 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2499 */
2500 $text = $this->strip( $text, $stripState, false );
2501 $text = $this->pstPass2( $text, $user );
2502 $text = $this->unstrip( $text, $stripState );
2503 $text = $this->unstripNoWiki( $text, $stripState );
2504 return $text;
2505 }
2506
2507 /**
2508 * Pre-save transform helper function
2509 * @access private
2510 */
2511 function pstPass2( $text, &$user ) {
2512 global $wgLang, $wgLocaltimezone, $wgCurParser;
2513
2514 # Variable replacement
2515 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2516 $text = $this->replaceVariables( $text );
2517
2518 # Signatures
2519 #
2520 $n = $user->getName();
2521 $k = $user->getOption( 'nickname' );
2522 if ( '' == $k ) { $k = $n; }
2523 if(isset($wgLocaltimezone)) {
2524 $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2525 }
2526 /* Note: this is an ugly timezone hack for the European wikis */
2527 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2528 ' (' . date( 'T' ) . ')';
2529 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2530
2531 $text = preg_replace( '/~~~~~/', $d, $text );
2532 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2533 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2534
2535 # Context links: [[|name]] and [[name (context)|]]
2536 #
2537 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2538 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2539 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2540 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2541
2542 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
2543 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
2544 $p3 = "/\[\[(:*$namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]] and [[:namespace:page|]]
2545 $p4 = "/\[\[(:*$namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/"; # [[ns:page (cont)|]] and [[:ns:page (cont)|]]
2546 $context = '';
2547 $t = $this->mTitle->getText();
2548 if ( preg_match( $conpat, $t, $m ) ) {
2549 $context = $m[2];
2550 }
2551 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2552 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2553 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2554
2555 if ( '' == $context ) {
2556 $text = preg_replace( $p2, '[[\\1]]', $text );
2557 } else {
2558 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2559 }
2560
2561 /*
2562 $mw =& MagicWord::get( MAG_SUBST );
2563 $wgCurParser = $this->fork();
2564 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2565 $this->merge( $wgCurParser );
2566 */
2567
2568 # Trim trailing whitespace
2569 # MAG_END (__END__) tag allows for trailing
2570 # whitespace to be deliberately included
2571 $text = rtrim( $text );
2572 $mw =& MagicWord::get( MAG_END );
2573 $mw->matchAndRemove( $text );
2574
2575 return $text;
2576 }
2577
2578 /**
2579 * Set up some variables which are usually set up in parse()
2580 * so that an external function can call some class members with confidence
2581 * @access public
2582 */
2583 function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2584 $this->mTitle =& $title;
2585 $this->mOptions = $options;
2586 $this->mOutputType = $outputType;
2587 if ( $clearState ) {
2588 $this->clearState();
2589 }
2590 }
2591
2592 /**
2593 * Transform a MediaWiki message by replacing magic variables.
2594 *
2595 * @param string $text the text to transform
2596 * @param ParserOptions $options options
2597 * @return string the text with variables substituted
2598 * @access public
2599 */
2600 function transformMsg( $text, $options ) {
2601 global $wgTitle;
2602 static $executing = false;
2603
2604 # Guard against infinite recursion
2605 if ( $executing ) {
2606 return $text;
2607 }
2608 $executing = true;
2609
2610 $this->mTitle = $wgTitle;
2611 $this->mOptions = $options;
2612 $this->mOutputType = OT_MSG;
2613 $this->clearState();
2614 $text = $this->replaceVariables( $text );
2615
2616 $executing = false;
2617 return $text;
2618 }
2619
2620 /**
2621 * Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2622 * Callback will be called with the text within
2623 * Transform and return the text within
2624 * @access public
2625 */
2626 function setHook( $tag, $callback ) {
2627 $oldVal = @$this->mTagHooks[$tag];
2628 $this->mTagHooks[$tag] = $callback;
2629 return $oldVal;
2630 }
2631 }
2632
2633 /**
2634 * @todo document
2635 * @package MediaWiki
2636 */
2637 class ParserOutput
2638 {
2639 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2640 var $mCacheTime; # Used in ParserCache
2641
2642 function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
2643 $containsOldMagic = false )
2644 {
2645 $this->mText = $text;
2646 $this->mLanguageLinks = $languageLinks;
2647 $this->mCategoryLinks = $categoryLinks;
2648 $this->mContainsOldMagic = $containsOldMagic;
2649 $this->mCacheTime = '';
2650 }
2651
2652 function getText() { return $this->mText; }
2653 function getLanguageLinks() { return $this->mLanguageLinks; }
2654 function getCategoryLinks() { return $this->mCategoryLinks; }
2655 function getCacheTime() { return $this->mCacheTime; }
2656 function containsOldMagic() { return $this->mContainsOldMagic; }
2657 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2658 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2659 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2660 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2661 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2662
2663 function merge( $other ) {
2664 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2665 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2666 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2667 }
2668
2669 }
2670
2671 /**
2672 * Set options of the Parser
2673 * @todo document
2674 * @package MediaWiki
2675 */
2676 class ParserOptions
2677 {
2678 # All variables are private
2679 var $mUseTeX; # Use texvc to expand <math> tags
2680 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2681 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2682 var $mAllowExternalImages; # Allow external images inline
2683 var $mSkin; # Reference to the preferred skin
2684 var $mDateFormat; # Date format index
2685 var $mEditSection; # Create "edit section" links
2686 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2687 var $mNumberHeadings; # Automatically number headings
2688 var $mShowToc; # Show table of contents
2689
2690 function getUseTeX() { return $this->mUseTeX; }
2691 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2692 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2693 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2694 function getSkin() { return $this->mSkin; }
2695 function getDateFormat() { return $this->mDateFormat; }
2696 function getEditSection() { return $this->mEditSection; }
2697 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2698 function getNumberHeadings() { return $this->mNumberHeadings; }
2699 function getShowToc() { return $this->mShowToc; }
2700
2701 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2702 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2703 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2704 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2705 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2706 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2707 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2708 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2709 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2710
2711 function setSkin( &$x ) { $this->mSkin =& $x; }
2712
2713 # Get parser options
2714 /* static */ function newFromUser( &$user ) {
2715 $popts = new ParserOptions;
2716 $popts->initialiseFromUser( $user );
2717 return $popts;
2718 }
2719
2720 # Get user options
2721 function initialiseFromUser( &$userInput ) {
2722 global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2723
2724 $fname = 'ParserOptions::initialiseFromUser';
2725 wfProfileIn( $fname );
2726 if ( !$userInput ) {
2727 $user = new User;
2728 $user->setLoaded( true );
2729 } else {
2730 $user =& $userInput;
2731 }
2732
2733 $this->mUseTeX = $wgUseTeX;
2734 $this->mUseDynamicDates = $wgUseDynamicDates;
2735 $this->mInterwikiMagic = $wgInterwikiMagic;
2736 $this->mAllowExternalImages = $wgAllowExternalImages;
2737 wfProfileIn( $fname.'-skin' );
2738 $this->mSkin =& $user->getSkin();
2739 wfProfileOut( $fname.'-skin' );
2740 $this->mDateFormat = $user->getOption( 'date' );
2741 $this->mEditSection = $user->getOption( 'editsection' );
2742 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2743 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2744 $this->mShowToc = $user->getOption( 'showtoc' );
2745 wfProfileOut( $fname );
2746 }
2747
2748
2749 }
2750
2751 # Regex callbacks, used in Parser::replaceVariables
2752 function wfBraceSubstitution( $matches ) {
2753 global $wgCurParser;
2754 return $wgCurParser->braceSubstitution( $matches );
2755 }
2756
2757 function wfArgSubstitution( $matches ) {
2758 global $wgCurParser;
2759 return $wgCurParser->argSubstitution( $matches );
2760 }
2761
2762 /**
2763 * Return the total number of articles
2764 */
2765 function wfNumberOfArticles() {
2766 global $wgNumberOfArticles;
2767
2768 wfLoadSiteStats();
2769 return $wgNumberOfArticles;
2770 }
2771
2772 /**
2773 * Get various statistics from the database
2774 * @private
2775 */
2776 function wfLoadSiteStats() {
2777 global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2778 $fname = 'wfLoadSiteStats';
2779
2780 if ( -1 != $wgNumberOfArticles ) return;
2781 $dbr =& wfGetDB( DB_SLAVE );
2782 $s = $dbr->getArray( 'site_stats',
2783 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2784 array( 'ss_row_id' => 1 ), $fname
2785 );
2786
2787 if ( $s === false ) {
2788 return;
2789 } else {
2790 $wgTotalViews = $s->ss_total_views;
2791 $wgTotalEdits = $s->ss_total_edits;
2792 $wgNumberOfArticles = $s->ss_good_articles;
2793 }
2794 }
2795
2796 function wfEscapeHTMLTagsOnly( $in ) {
2797 return str_replace(
2798 array( '"', '>', '<' ),
2799 array( '&quot;', '&gt;', '&lt;' ),
2800 $in );
2801 }
2802
2803 function wfGetSectionNumber() {
2804 global $wgCurrentSectionNumber;
2805 $str = base64_encode("$wgCurrentSectionNumber");
2806 $wgCurrentSectionNumber++;
2807 return $str;
2808 }
2809 ?>