Oops, I committed the wrong file - sorry. :)
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 /**
4 * File for Parser and related classes
5 *
6 * @package MediaWiki
7 * @version $Id$
8 */
9
10 /**
11 * Variable substitution O(N^2) attack
12 *
13 * Without countermeasures, it would be possible to attack the parser by saving
14 * a page filled with a large number of inclusions of large pages. The size of
15 * the generated page would be proportional to the square of the input size.
16 * Hence, we limit the number of inclusions of any given page, thus bringing any
17 * attack back to O(N).
18 */
19 define( 'MAX_INCLUDE_REPEAT', 100 );
20 define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Million
21
22 # Allowed values for $mOutputType
23 define( 'OT_HTML', 1 );
24 define( 'OT_WIKI', 2 );
25 define( 'OT_MSG' , 3 );
26
27 # string parameter for extractTags which will cause it
28 # to strip HTML comments in addition to regular
29 # <XML>-style tags. This should not be anything we
30 # may want to use in wikisyntax
31 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
32
33 # prefix for escaping, used in two functions at least
34 define( 'UNIQ_PREFIX', 'NaodW29');
35
36 # Constants needed for external link processing
37 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
38 define( 'HTTP_PROTOCOLS', 'http|https' );
39 # Everything except bracket, space, or control characters
40 define( 'EXT_LINK_URL_CLASS', '[^]<>\\x00-\\x20\\x7F]' );
41 # Including space
42 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
43 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
44 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
45 define( 'EXT_LINK_BRACKETED', '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
46 define( 'EXT_IMAGE_REGEX',
47 '/^('.HTTP_PROTOCOLS.':)'. # Protocol
48 '('.EXT_LINK_URL_CLASS.'+)\\/'. # Hostname and path
49 '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
50 );
51
52 /**
53 * PHP Parser
54 *
55 * Processes wiki markup
56 *
57 * <pre>
58 * There are three main entry points into the Parser class:
59 * parse()
60 * produces HTML output
61 * preSaveTransform().
62 * produces altered wiki markup.
63 * transformMsg()
64 * performs brace substitution on MediaWiki messages
65 *
66 * Globals used:
67 * objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
68 *
69 * NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
70 *
71 * settings:
72 * $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
73 * $wgNamespacesWithSubpages, $wgAllowExternalImages*,
74 * $wgLocaltimezone
75 *
76 * * only within ParserOptions
77 * </pre>
78 *
79 * @package MediaWiki
80 */
81 class Parser
82 {
83 /**#@+
84 * @access private
85 */
86 # Persistent:
87 var $mTagHooks;
88
89 # Cleared with clearState():
90 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
91 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
92
93 # Temporary:
94 var $mOptions, $mTitle, $mOutputType,
95 $mTemplates, // cache of already loaded templates, avoids
96 // multiple SQL queries for the same string
97 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
98 // in this path. Used for loop detection.
99
100 /**#@-*/
101
102 /**
103 * Constructor
104 *
105 * @access public
106 */
107 function Parser() {
108 $this->mTemplates = array();
109 $this->mTemplatePath = array();
110 $this->mTagHooks = array();
111 $this->clearState();
112 }
113
114 /**
115 * Clear Parser state
116 *
117 * @access private
118 */
119 function clearState() {
120 $this->mOutput = new ParserOutput;
121 $this->mAutonumber = 0;
122 $this->mLastSection = "";
123 $this->mDTopen = false;
124 $this->mVariables = false;
125 $this->mIncludeCount = array();
126 $this->mStripState = array();
127 $this->mArgStack = array();
128 $this->mInPre = false;
129 }
130
131 /**
132 * First pass--just handle <nowiki> sections, pass the rest off
133 * to internalParse() which does all the real work.
134 *
135 * @access private
136 * @return ParserOutput a ParserOutput
137 */
138 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
139 global $wgUseTidy;
140 $fname = 'Parser::parse';
141 wfProfileIn( $fname );
142
143 if ( $clearState ) {
144 $this->clearState();
145 }
146
147 $this->mOptions = $options;
148 $this->mTitle =& $title;
149 $this->mOutputType = OT_HTML;
150
151 $stripState = NULL;
152 $text = $this->strip( $text, $this->mStripState );
153 $text = $this->internalParse( $text, $linestart );
154 $text = $this->unstrip( $text, $this->mStripState );
155 # Clean up special characters, only run once, next-to-last before doBlockLevels
156 if(!$wgUseTidy) {
157 $fixtags = array(
158 # french spaces, last one Guillemet-left
159 # only if there is something before the space
160 '/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
161 # french spaces, Guillemet-right
162 "/(\\302\\253) /i"=>"\\1&nbsp;",
163 '/<hr *>/i' => '<hr />',
164 '/<br *>/i' => '<br />',
165 '/<center *>/i' => '<div class="center">',
166 '/<\\/center *>/i' => '</div>',
167 # Clean up spare ampersands; note that we probably ought to be
168 # more careful about named entities.
169 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
170 );
171 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
172 } else {
173 $fixtags = array(
174 # french spaces, last one Guillemet-left
175 '/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
176 # french spaces, Guillemet-right
177 '/(\\302\\253) /i' => '\\1&nbsp;',
178 '/<center *>/i' => '<div class="center">',
179 '/<\\/center *>/i' => '</div>'
180 );
181 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
182 }
183 # only once and last
184 $text = $this->doBlockLevels( $text, $linestart );
185 $text = $this->unstripNoWiki( $text, $this->mStripState );
186 $this->mOutput->setText( $text );
187 wfProfileOut( $fname );
188 return $this->mOutput;
189 }
190
191 /**
192 * Get a random string
193 *
194 * @access private
195 * @static
196 */
197 function getRandomString() {
198 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
199 }
200
201 /**
202 * Replaces all occurrences of <$tag>content</$tag> in the text
203 * with a random marker and returns the new text. the output parameter
204 * $content will be an associative array filled with data on the form
205 * $unique_marker => content.
206 *
207 * If $content is already set, the additional entries will be appended
208 * If $tag is set to STRIP_COMMENTS, the function will extract
209 * <!-- HTML comments -->
210 *
211 * @access private
212 * @static
213 */
214 function extractTags($tag, $text, &$content, $uniq_prefix = ''){
215 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
216 if ( !$content ) {
217 $content = array( );
218 }
219 $n = 1;
220 $stripped = '';
221
222 while ( '' != $text ) {
223 if($tag==STRIP_COMMENTS) {
224 $p = preg_split( '/<!--/i', $text, 2 );
225 } else {
226 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
227 }
228 $stripped .= $p[0];
229 if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
230 $text = '';
231 } else {
232 if($tag==STRIP_COMMENTS) {
233 $q = preg_split( '/-->/i', $p[1], 2 );
234 } else {
235 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
236 }
237 $marker = $rnd . sprintf('%08X', $n++);
238 $content[$marker] = $q[0];
239 $stripped .= $marker;
240 $text = $q[1];
241 }
242 }
243 return $stripped;
244 }
245
246 /**
247 * Strips and renders nowiki, pre, math, hiero
248 * If $render is set, performs necessary rendering operations on plugins
249 * Returns the text, and fills an array with data needed in unstrip()
250 * If the $state is already a valid strip state, it adds to the state
251 *
252 * @param bool $stripcomments when set, HTML comments <!-- like this -->
253 * will be stripped in addition to other tags. This is important
254 * for section editing, where these comments cause confusion when
255 * counting the sections in the wikisource
256 *
257 * @access private
258 */
259 function strip( $text, &$state, $stripcomments = false ) {
260 $render = ($this->mOutputType == OT_HTML);
261 $html_content = array();
262 $nowiki_content = array();
263 $math_content = array();
264 $pre_content = array();
265 $comment_content = array();
266 $ext_content = array();
267
268 # Replace any instances of the placeholders
269 $uniq_prefix = UNIQ_PREFIX;
270 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
271
272 # html
273 global $wgRawHtml, $wgWhitelistEdit;
274 if( $wgRawHtml && $wgWhitelistEdit ) {
275 $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
276 foreach( $html_content as $marker => $content ) {
277 if ($render ) {
278 # Raw and unchecked for validity.
279 $html_content[$marker] = $content;
280 } else {
281 $html_content[$marker] = '<html>'.$content.'</html>';
282 }
283 }
284 }
285
286 # nowiki
287 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
288 foreach( $nowiki_content as $marker => $content ) {
289 if( $render ){
290 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
291 } else {
292 $nowiki_content[$marker] = '<nowiki>'.$content.'</nowiki>';
293 }
294 }
295
296 # math
297 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
298 foreach( $math_content as $marker => $content ){
299 if( $render ) {
300 if( $this->mOptions->getUseTeX() ) {
301 $math_content[$marker] = renderMath( $content );
302 } else {
303 $math_content[$marker] = '&lt;math&gt;'.$content.'&lt;math&gt;';
304 }
305 } else {
306 $math_content[$marker] = '<math>'.$content.'</math>';
307 }
308 }
309
310 # pre
311 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
312 foreach( $pre_content as $marker => $content ){
313 if( $render ){
314 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
315 } else {
316 $pre_content[$marker] = '<pre>'.$content.'</pre>';
317 }
318 }
319
320 # Comments
321 if($stripcomments) {
322 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
323 foreach( $comment_content as $marker => $content ){
324 $comment_content[$marker] = '<!--'.$content.'-->';
325 }
326 }
327
328 # Extensions
329 foreach ( $this->mTagHooks as $tag => $callback ) {
330 $ext_contents[$tag] = array();
331 $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
332 foreach( $ext_content[$tag] as $marker => $content ) {
333 if ( $render ) {
334 $ext_content[$tag][$marker] = $callback( $content );
335 } else {
336 $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
337 }
338 }
339 }
340
341 # Merge state with the pre-existing state, if there is one
342 if ( $state ) {
343 $state['html'] = $state['html'] + $html_content;
344 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
345 $state['math'] = $state['math'] + $math_content;
346 $state['pre'] = $state['pre'] + $pre_content;
347 $state['comment'] = $state['comment'] + $comment_content;
348
349 foreach( $ext_content as $tag => $array ) {
350 if ( array_key_exists( $tag, $state ) ) {
351 $state[$tag] = $state[$tag] + $array;
352 }
353 }
354 } else {
355 $state = array(
356 'html' => $html_content,
357 'nowiki' => $nowiki_content,
358 'math' => $math_content,
359 'pre' => $pre_content,
360 'comment' => $comment_content,
361 ) + $ext_content;
362 }
363 return $text;
364 }
365
366 /**
367 * restores pre, math, and heiro removed by strip()
368 *
369 * always call unstripNoWiki() after this one
370 * @access private
371 */
372 function unstrip( $text, &$state ) {
373 # Must expand in reverse order, otherwise nested tags will be corrupted
374 $contentDict = end( $state );
375 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
376 if( key($state) != 'nowiki' && key($state) != 'html') {
377 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
378 $text = str_replace( key( $contentDict ), $content, $text );
379 }
380 }
381 }
382
383 return $text;
384 }
385
386 /**
387 * always call this after unstrip() to preserve the order
388 *
389 * @access private
390 */
391 function unstripNoWiki( $text, &$state ) {
392 # Must expand in reverse order, otherwise nested tags will be corrupted
393 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
394 $text = str_replace( key( $state['nowiki'] ), $content, $text );
395 }
396
397 global $wgRawHtml;
398 if ($wgRawHtml) {
399 for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
400 $text = str_replace( key( $state['html'] ), $content, $text );
401 }
402 }
403
404 return $text;
405 }
406
407 /**
408 * Add an item to the strip state
409 * Returns the unique tag which must be inserted into the stripped text
410 * The tag will be replaced with the original text in unstrip()
411 *
412 * @access private
413 */
414 function insertStripItem( $text, &$state ) {
415 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
416 if ( !$state ) {
417 $state = array(
418 'html' => array(),
419 'nowiki' => array(),
420 'math' => array(),
421 'pre' => array()
422 );
423 }
424 $state['item'][$rnd] = $text;
425 return $rnd;
426 }
427
428 /**
429 * Return allowed HTML attributes
430 *
431 * @access private
432 */
433 function getHTMLattrs () {
434 $htmlattrs = array( # Allowed attributes--no scripting, etc.
435 'title', 'align', 'lang', 'dir', 'width', 'height',
436 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
437 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
438 /* FONT */ 'type', 'start', 'value', 'compact',
439 /* For various lists, mostly deprecated but safe */
440 'summary', 'width', 'border', 'frame', 'rules',
441 'cellspacing', 'cellpadding', 'valign', 'char',
442 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
443 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
444 'id', 'class', 'name', 'style' /* For CSS */
445 );
446 return $htmlattrs ;
447 }
448
449 /**
450 * Remove non approved attributes and javascript in css
451 *
452 * @access private
453 */
454 function fixTagAttributes ( $t ) {
455 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
456 $htmlattrs = $this->getHTMLattrs() ;
457
458 # Strip non-approved attributes from the tag
459 $t = preg_replace(
460 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
461 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
462 $t);
463
464 $t = str_replace ( '<></>' , '' , $t ) ; # This should fix bug 980557
465
466 # Strip javascript "expression" from stylesheets. Brute force approach:
467 # If anythin offensive is found, all attributes of the HTML tag are dropped
468
469 if( preg_match(
470 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
471 wfMungeToUtf8( $t ) ) )
472 {
473 $t='';
474 }
475
476 return trim ( $t ) ;
477 }
478
479 /**
480 * interface with html tidy, used if $wgUseTidy = true
481 *
482 * @access public
483 * @static
484 */
485 function tidy ( $text ) {
486 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
487 global $wgInputEncoding, $wgOutputEncoding;
488 $fname = 'Parser::tidy';
489 wfProfileIn( $fname );
490
491 $cleansource = '';
492 $opts = '';
493 switch(strtoupper($wgOutputEncoding)) {
494 case 'ISO-8859-1':
495 $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
496 break;
497 case 'UTF-8':
498 $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
499 break;
500 default:
501 $opts .= ' -raw';
502 }
503
504 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
505 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
506 '<head><title>test</title></head><body>'.$text.'</body></html>';
507 $descriptorspec = array(
508 0 => array('pipe', 'r'),
509 1 => array('pipe', 'w'),
510 2 => array('file', '/dev/null', 'a')
511 );
512 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
513 if (is_resource($process)) {
514 fwrite($pipes[0], $wrappedtext);
515 fclose($pipes[0]);
516 while (!feof($pipes[1])) {
517 $cleansource .= fgets($pipes[1], 1024);
518 }
519 fclose($pipes[1]);
520 $return_value = proc_close($process);
521 }
522
523 wfProfileOut( $fname );
524
525 if( $cleansource == '' && $text != '') {
526 wfDebug( "Tidy error detected!\n" );
527 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
528 } else {
529 return $cleansource;
530 }
531 }
532
533 /**
534 * parse the wiki syntax used to render tables
535 *
536 * @access private
537 */
538 function doTableStuff ( $t ) {
539 $fname = 'Parser::doTableStuff';
540 wfProfileIn( $fname );
541
542 $t = explode ( "\n" , $t ) ;
543 $td = array () ; # Is currently a td tag open?
544 $ltd = array () ; # Was it TD or TH?
545 $tr = array () ; # Is currently a tr tag open?
546 $ltr = array () ; # tr attributes
547 $indent_level = 0; # indent level of the table
548 foreach ( $t AS $k => $x )
549 {
550 $x = trim ( $x ) ;
551 $fc = substr ( $x , 0 , 1 ) ;
552 if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
553 $indent_level = strlen( $matches[1] );
554 $t[$k] = "\n" .
555 str_repeat( '<dl><dd>', $indent_level ) .
556 '<table ' . $this->fixTagAttributes ( $matches[2] ) . '>' ;
557 array_push ( $td , false ) ;
558 array_push ( $ltd , '' ) ;
559 array_push ( $tr , false ) ;
560 array_push ( $ltr , '' ) ;
561 }
562 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
563 else if ( '|}' == substr ( $x , 0 , 2 ) ) {
564 $z = "</table>\n" ;
565 $l = array_pop ( $ltd ) ;
566 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
567 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
568 array_pop ( $ltr ) ;
569 $t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
570 }
571 else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
572 $x = substr ( $x , 1 ) ;
573 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
574 $z = '' ;
575 $l = array_pop ( $ltd ) ;
576 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
577 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
578 array_pop ( $ltr ) ;
579 $t[$k] = $z ;
580 array_push ( $tr , false ) ;
581 array_push ( $td , false ) ;
582 array_push ( $ltd , '' ) ;
583 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
584 }
585 else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
586 # $x is a table row
587 if ( '|+' == substr ( $x , 0 , 2 ) ) {
588 $fc = '+' ;
589 $x = substr ( $x , 1 ) ;
590 }
591 $after = substr ( $x , 1 ) ;
592 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
593 $after = explode ( '||' , $after ) ;
594 $t[$k] = '' ;
595
596 # Loop through each table cell
597 foreach ( $after AS $theline )
598 {
599 $z = '' ;
600 if ( $fc != '+' )
601 {
602 $tra = array_pop ( $ltr ) ;
603 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
604 array_push ( $tr , true ) ;
605 array_push ( $ltr , '' ) ;
606 }
607
608 $l = array_pop ( $ltd ) ;
609 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
610 if ( $fc == '|' ) $l = 'td' ;
611 else if ( $fc == '!' ) $l = 'th' ;
612 else if ( $fc == '+' ) $l = 'caption' ;
613 else $l = '' ;
614 array_push ( $ltd , $l ) ;
615
616 # Cell parameters
617 $y = explode ( '|' , $theline , 2 ) ;
618 # Note that a '|' inside an invalid link should not
619 # be mistaken as delimiting cell parameters
620 if ( strpos( $y[0], '[[' ) !== false ) {
621 $y = array ($theline);
622 }
623 if ( count ( $y ) == 1 )
624 $y = "{$z}<{$l}>{$y[0]}" ;
625 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
626 $t[$k] .= $y ;
627 array_push ( $td , true ) ;
628 }
629 }
630 }
631
632 # Closing open td, tr && table
633 while ( count ( $td ) > 0 )
634 {
635 if ( array_pop ( $td ) ) $t[] = '</td>' ;
636 if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
637 $t[] = '</table>' ;
638 }
639
640 $t = implode ( "\n" , $t ) ;
641 # $t = $this->removeHTMLtags( $t );
642 wfProfileOut( $fname );
643 return $t ;
644 }
645
646 /**
647 * Helper function for parse() that transforms wiki markup into
648 * HTML. Only called for $mOutputType == OT_HTML.
649 *
650 * @access private
651 */
652 function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
653 global $wgContLang;
654
655 $fname = 'Parser::internalParse';
656 wfProfileIn( $fname );
657
658 $text = $this->removeHTMLtags( $text );
659 $text = $this->replaceVariables( $text, $args );
660
661 $text = $wgContLang->convert($text);
662
663 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
664
665 $text = $this->doHeadings( $text );
666 if($this->mOptions->getUseDynamicDates()) {
667 global $wgDateFormatter;
668 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
669 }
670 $text = $this->doAllQuotes( $text );
671 $text = $this->replaceInternalLinks ( $text );
672 # Another call to replace links and images inside captions of images
673 $text = $this->replaceInternalLinks ( $text );
674 $text = $this->replaceExternalLinks( $text );
675 $text = $this->doMagicLinks( $text );
676 $text = $this->doTableStuff( $text );
677 $text = $this->formatHeadings( $text, $isMain );
678 $sk =& $this->mOptions->getSkin();
679 $text = $sk->transformContent( $text );
680
681 wfProfileOut( $fname );
682 return $text;
683 }
684
685 /**
686 * Replace special strings like "ISBN xxx" and "RFC xxx" with
687 * magic external links.
688 *
689 * @access private
690 */
691 function &doMagicLinks( &$text ) {
692 global $wgUseGeoMode;
693 $text = $this->magicISBN( $text );
694 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
695 $text = $this->magicGEO( $text );
696 }
697 $text = $this->magicRFC( $text );
698 return $text;
699 }
700
701 /**
702 * Parse ^^ tokens and return html
703 *
704 * @access private
705 */
706 function doExponent ( $text ) {
707 $fname = 'Parser::doExponent';
708 wfProfileIn( $fname);
709 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
710 wfProfileOut( $fname);
711 return $text;
712 }
713
714 /**
715 * Parse headers and return html
716 *
717 * @access private
718 */
719 function doHeadings( $text ) {
720 $fname = 'Parser::doHeadings';
721 wfProfileIn( $fname );
722 for ( $i = 6; $i >= 1; --$i ) {
723 $h = substr( '======', 0, $i );
724 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
725 "<h{$i}>\\1</h{$i}>\\2", $text );
726 }
727 wfProfileOut( $fname );
728 return $text;
729 }
730
731 /**
732 * Replace single quotes with HTML markup
733 * @access private
734 * @return string the altered text
735 */
736 function doAllQuotes( $text ) {
737 $fname = 'Parser::doAllQuotes';
738 wfProfileIn( $fname );
739 $outtext = '';
740 $lines = explode( "\n", $text );
741 foreach ( $lines as $line ) {
742 $outtext .= $this->doQuotes ( $line ) . "\n";
743 }
744 $outtext = substr($outtext, 0,-1);
745 wfProfileOut( $fname );
746 return $outtext;
747 }
748
749 /**
750 * Helper function for doAllQuotes()
751 * @access private
752 */
753 function doQuotes( $text ) {
754 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
755 if (count ($arr) == 1)
756 return $text;
757 else
758 {
759 # First, do some preliminary work. This may shift some apostrophes from
760 # being mark-up to being text. It also counts the number of occurrences
761 # of bold and italics mark-ups.
762 $i = 0;
763 $numbold = 0;
764 $numitalics = 0;
765 foreach ($arr as $r)
766 {
767 if (($i % 2) == 1)
768 {
769 # If there are ever four apostrophes, assume the first is supposed to
770 # be text, and the remaining three constitute mark-up for bold text.
771 if (strlen ($arr[$i]) == 4)
772 {
773 $arr[$i-1] .= "'";
774 $arr[$i] = "'''";
775 }
776 # If there are more than 5 apostrophes in a row, assume they're all
777 # text except for the last 5.
778 else if (strlen ($arr[$i]) > 5)
779 {
780 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
781 $arr[$i] = "'''''";
782 }
783 # Count the number of occurrences of bold and italics mark-ups.
784 # We are not counting sequences of five apostrophes.
785 if (strlen ($arr[$i]) == 2) $numitalics++; else
786 if (strlen ($arr[$i]) == 3) $numbold++; else
787 if (strlen ($arr[$i]) == 5) { $numitalics++; $numbold++; }
788 }
789 $i++;
790 }
791
792 # If there is an odd number of both bold and italics, it is likely
793 # that one of the bold ones was meant to be an apostrophe followed
794 # by italics. Which one we cannot know for certain, but it is more
795 # likely to be one that has a single-letter word before it.
796 if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
797 {
798 $i = 0;
799 $firstsingleletterword = -1;
800 $firstmultiletterword = -1;
801 $firstspace = -1;
802 foreach ($arr as $r)
803 {
804 if (($i % 2 == 1) and (strlen ($r) == 3))
805 {
806 $x1 = substr ($arr[$i-1], -1);
807 $x2 = substr ($arr[$i-1], -2, 1);
808 if ($x1 == ' ') {
809 if ($firstspace == -1) $firstspace = $i;
810 } else if ($x2 == ' ') {
811 if ($firstsingleletterword == -1) $firstsingleletterword = $i;
812 } else {
813 if ($firstmultiletterword == -1) $firstmultiletterword = $i;
814 }
815 }
816 $i++;
817 }
818
819 # If there is a single-letter word, use it!
820 if ($firstsingleletterword > -1)
821 {
822 $arr [ $firstsingleletterword ] = "''";
823 $arr [ $firstsingleletterword-1 ] .= "'";
824 }
825 # If not, but there's a multi-letter word, use that one.
826 else if ($firstmultiletterword > -1)
827 {
828 $arr [ $firstmultiletterword ] = "''";
829 $arr [ $firstmultiletterword-1 ] .= "'";
830 }
831 # ... otherwise use the first one that has neither.
832 # (notice that it is possible for all three to be -1 if, for example,
833 # there is only one pentuple-apostrophe in the line)
834 else if ($firstspace > -1)
835 {
836 $arr [ $firstspace ] = "''";
837 $arr [ $firstspace-1 ] .= "'";
838 }
839 }
840
841 # Now let's actually convert our apostrophic mush to HTML!
842 $output = '';
843 $buffer = '';
844 $state = '';
845 $i = 0;
846 foreach ($arr as $r)
847 {
848 if (($i % 2) == 0)
849 {
850 if ($state == 'both')
851 $buffer .= $r;
852 else
853 $output .= $r;
854 }
855 else
856 {
857 if (strlen ($r) == 2)
858 {
859 if ($state == 'i')
860 { $output .= '</i>'; $state = ''; }
861 else if ($state == 'bi')
862 { $output .= '</i>'; $state = 'b'; }
863 else if ($state == 'ib')
864 { $output .= '</b></i><b>'; $state = 'b'; }
865 else if ($state == 'both')
866 { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
867 else # $state can be 'b' or ''
868 { $output .= '<i>'; $state .= 'i'; }
869 }
870 else if (strlen ($r) == 3)
871 {
872 if ($state == 'b')
873 { $output .= '</b>'; $state = ''; }
874 else if ($state == 'bi')
875 { $output .= '</i></b><i>'; $state = 'i'; }
876 else if ($state == 'ib')
877 { $output .= '</b>'; $state = 'i'; }
878 else if ($state == 'both')
879 { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
880 else # $state can be 'i' or ''
881 { $output .= '<b>'; $state .= 'b'; }
882 }
883 else if (strlen ($r) == 5)
884 {
885 if ($state == 'b')
886 { $output .= '</b><i>'; $state = 'i'; }
887 else if ($state == 'i')
888 { $output .= '</i><b>'; $state = 'b'; }
889 else if ($state == 'bi')
890 { $output .= '</i></b>'; $state = ''; }
891 else if ($state == 'ib')
892 { $output .= '</b></i>'; $state = ''; }
893 else if ($state == 'both')
894 { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
895 else # ($state == '')
896 { $buffer = ''; $state = 'both'; }
897 }
898 }
899 $i++;
900 }
901 # Now close all remaining tags. Notice that the order is important.
902 if ($state == 'b' || $state == 'ib')
903 $output .= '</b>';
904 if ($state == 'i' || $state == 'bi' || $state == 'ib')
905 $output .= '</i>';
906 if ($state == 'bi')
907 $output .= '</b>';
908 if ($state == 'both')
909 $output .= '<b><i>'.$buffer.'</i></b>';
910 return $output;
911 }
912 }
913
914 /**
915 * Replace external links
916 *
917 * Note: we have to do external links before the internal ones,
918 * and otherwise take great care in the order of things here, so
919 * that we don't end up interpreting some URLs twice.
920 *
921 * @access private
922 */
923 function replaceExternalLinks( $text ) {
924 $fname = 'Parser::replaceExternalLinks';
925 wfProfileIn( $fname );
926
927 $sk =& $this->mOptions->getSkin();
928 $linktrail = wfMsgForContent('linktrail');
929 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
930
931 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
932
933 $i = 0;
934 while ( $i<count( $bits ) ) {
935 $url = $bits[$i++];
936 $protocol = $bits[$i++];
937 $text = $bits[$i++];
938 $trail = $bits[$i++];
939
940 # If the link text is an image URL, replace it with an <img> tag
941 # This happened by accident in the original parser, but some people used it extensively
942 $img = $this->maybeMakeImageLink( $text );
943 if ( $img !== false ) {
944 $text = $img;
945 }
946
947 $dtrail = '';
948
949 # No link text, e.g. [http://domain.tld/some.link]
950 if ( $text == '' ) {
951 # Autonumber if allowed
952 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
953 $text = '[' . ++$this->mAutonumber . ']';
954 } else {
955 # Otherwise just use the URL
956 $text = htmlspecialchars( $url );
957 }
958 } else {
959 # Have link text, e.g. [http://domain.tld/some.link text]s
960 # Check for trail
961 if ( preg_match( $linktrail, $trail, $m2 ) ) {
962 $dtrail = $m2[1];
963 $trail = $m2[2];
964 }
965 }
966
967 $encUrl = htmlspecialchars( $url );
968 # Bit in parentheses showing the URL for the printable version
969 if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, '/' ) . "/?$!", $url ) ) {
970 $paren = '';
971 } else {
972 # Expand the URL for printable version
973 if ( ! $sk->suppressUrlExpansion() ) {
974 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
975 } else {
976 $paren = '';
977 }
978 }
979
980 # Process the trail (i.e. everything after this link up until start of the next link),
981 # replacing any non-bracketed links
982 $trail = $this->replaceFreeExternalLinks( $trail );
983
984 # Use the encoded URL
985 # This means that users can paste URLs directly into the text
986 # Funny characters like &ouml; aren't valid in URLs anyway
987 # This was changed in August 2004
988 $s .= $sk->makeExternalLink( $url, $text, false ) . $dtrail. $paren . $trail;
989 }
990
991 wfProfileOut( $fname );
992 return $s;
993 }
994
995 /**
996 * Replace anything that looks like a URL with a link
997 * @access private
998 */
999 function replaceFreeExternalLinks( $text ) {
1000 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1001 $s = array_shift( $bits );
1002 $i = 0;
1003
1004 $sk =& $this->mOptions->getSkin();
1005
1006 while ( $i < count( $bits ) ){
1007 $protocol = $bits[$i++];
1008 $remainder = $bits[$i++];
1009
1010 if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
1011 # Found some characters after the protocol that look promising
1012 $url = $protocol . $m[1];
1013 $trail = $m[2];
1014
1015 # Move trailing punctuation to $trail
1016 $sep = ',;\.:!?';
1017 # If there is no left bracket, then consider right brackets fair game too
1018 if ( strpos( $url, '(' ) === false ) {
1019 $sep .= ')';
1020 }
1021
1022 $numSepChars = strspn( strrev( $url ), $sep );
1023 if ( $numSepChars ) {
1024 $trail = substr( $url, -$numSepChars ) . $trail;
1025 $url = substr( $url, 0, -$numSepChars );
1026 }
1027
1028 # Replace &amp; from obsolete syntax with &
1029 $url = str_replace( '&amp;', '&', $url );
1030
1031 # Is this an external image?
1032 $text = $this->maybeMakeImageLink( $url );
1033 if ( $text === false ) {
1034 # Not an image, make a link
1035 $text = $sk->makeExternalLink( $url, $url );
1036 }
1037 $s .= $text . $trail;
1038 } else {
1039 $s .= $protocol . $remainder;
1040 }
1041 }
1042 return $s;
1043 }
1044
1045 /**
1046 * make an image if it's allowed
1047 * @access private
1048 */
1049 function maybeMakeImageLink( $url ) {
1050 $sk =& $this->mOptions->getSkin();
1051 $text = false;
1052 if ( $this->mOptions->getAllowExternalImages() ) {
1053 if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
1054 # Image found
1055 $text = $sk->makeImage( htmlspecialchars( $url ) );
1056 }
1057 }
1058 return $text;
1059 }
1060
1061 /**
1062 * Process [[ ]] wikilinks
1063 *
1064 * @access private
1065 */
1066 function replaceInternalLinks( $s ) {
1067 global $wgLang, $wgContLang, $wgLinkCache;
1068 static $fname = 'Parser::replaceInternalLinks' ;
1069 wfProfileIn( $fname );
1070
1071 wfProfileIn( $fname.'-setup' );
1072 static $tc = FALSE;
1073 # the % is needed to support urlencoded titles as well
1074 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
1075 $sk =& $this->mOptions->getSkin();
1076
1077 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
1078
1079 $a = explode( '[[', ' ' . $s );
1080 $s = array_shift( $a );
1081 $s = substr( $s, 1 );
1082
1083 # Match a link having the form [[namespace:link|alternate]]trail
1084 static $e1 = FALSE;
1085 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1086 # Match the end of a line for a word that's not followed by whitespace,
1087 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1088 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1089
1090 $useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
1091 # Special and Media are pseudo-namespaces; no pages actually exist in them
1092
1093 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1094
1095 if ( $useLinkPrefixExtension ) {
1096 if ( preg_match( $e2, $s, $m ) ) {
1097 $first_prefix = $m[2];
1098 $s = $m[1];
1099 } else {
1100 $first_prefix = false;
1101 }
1102 } else {
1103 $prefix = '';
1104 }
1105
1106 wfProfileOut( $fname.'-setup' );
1107
1108 # start procedeeding each line
1109 foreach ( $a as $line ) {
1110 wfProfileIn( $fname.'-prefixhandling' );
1111 if ( $useLinkPrefixExtension ) {
1112 if ( preg_match( $e2, $s, $m ) ) {
1113 $prefix = $m[2];
1114 $s = $m[1];
1115 } else {
1116 $prefix='';
1117 }
1118 # first link
1119 if($first_prefix) {
1120 $prefix = $first_prefix;
1121 $first_prefix = false;
1122 }
1123 }
1124 wfProfileOut( $fname.'-prefixhandling' );
1125
1126 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1127 $text = $m[2];
1128 # fix up urlencoded title texts
1129 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1130 $trail = $m[3];
1131 } else { # Invalid form; output directly
1132 $s .= $prefix . '[[' . $line ;
1133 continue;
1134 }
1135
1136 # Don't allow internal links to pages containing
1137 # PROTO: where PROTO is a valid URL protocol; these
1138 # should be external links.
1139 if (preg_match('/((?:'.URL_PROTOCOLS.'):)/', $m[1])) {
1140 $s .= $prefix . '[[' . $line ;
1141 continue;
1142 }
1143
1144 # Make subpage if necessary
1145 $link = $this->maybeDoSubpageLink( $m[1], $text );
1146
1147 $noforce = (substr($m[1], 0, 1) != ':');
1148 if (!$noforce) {
1149 # Strip off leading ':'
1150 $link = substr($link, 1);
1151 }
1152
1153 $wasblank = ( '' == $text );
1154 if( $wasblank ) $text = $link;
1155
1156 $nt = Title::newFromText( $link );
1157 if( !$nt ) {
1158 $s .= $prefix . '[[' . $line;
1159 continue;
1160 }
1161
1162 //check other language variants of the link
1163 //if the article does not exist
1164 global $wgContLang;
1165 $variants = $wgContLang->getVariants();
1166 if(sizeof($variants) > 1) {
1167 $varnt = false;
1168 if($nt->getArticleID() == 0) {
1169 foreach ( $variants as $v ) {
1170 if($v == $wgContLang->getPreferredVariant())
1171 continue;
1172 $varlink = $wgContLang->autoConvert($link, $v);
1173 $varnt = Title::newFromText($varlink);
1174 if($varnt && $varnt->getArticleID()>0) {
1175 break;
1176 }
1177 }
1178 }
1179 if($varnt && $varnt->getArticleID()>0) {
1180 $nt = $varnt;
1181 $link = $varlink;
1182 }
1183 }
1184
1185 $ns = $nt->getNamespace();
1186 $iw = $nt->getInterWiki();
1187
1188 # Link not escaped by : , create the various objects
1189 if( $noforce ) {
1190
1191 # Interwikis
1192 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) {
1193 array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1194 $tmp = $prefix . $trail ;
1195 $s .= (trim($tmp) == '')? '': $tmp;
1196 continue;
1197 }
1198
1199 if ( $ns == NS_IMAGE ) {
1200 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1201 $wgLinkCache->addImageLinkObj( $nt );
1202 continue;
1203 }
1204
1205 if ( $ns == NS_CATEGORY ) {
1206 $t = $nt->getText() ;
1207 $nnt = Title::newFromText ( Namespace::getCanonicalName(NS_CATEGORY).':'.$t ) ;
1208
1209 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1210 $pPLC=$sk->postParseLinkColour();
1211 $sk->postParseLinkColour( false );
1212 $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1213 $sk->postParseLinkColour( $pPLC );
1214 $wgLinkCache->resume();
1215
1216 if ( $wasblank ) {
1217 if ( $this->mTitle->getNamespace() == NS_CATEGORY ) {
1218 $sortkey = $this->mTitle->getText();
1219 } else {
1220 $sortkey = $this->mTitle->getPrefixedText();
1221 }
1222 } else {
1223 $sortkey = $text;
1224 }
1225 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1226 $this->mOutput->mCategoryLinks[] = $t ;
1227 $s .= $prefix . $trail ;
1228 continue;
1229 }
1230 }
1231
1232 if( ( $nt->getPrefixedText() === $this->mTitle->getPrefixedText() ) &&
1233 ( strpos( $link, '#' ) === FALSE ) ) {
1234 # Self-links are handled specially; generally de-link and change to bold.
1235 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1236 continue;
1237 }
1238
1239 if( $ns == NS_MEDIA ) {
1240 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1241 $wgLinkCache->addImageLinkObj( $nt );
1242 continue;
1243 } elseif( $ns == NS_SPECIAL ) {
1244 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1245 continue;
1246 }
1247 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1248 }
1249 wfProfileOut( $fname );
1250 return $s;
1251 }
1252
1253 /**
1254 * Handle link to subpage if necessary
1255 * @param $target string the source of the link
1256 * @param &$text the link text, modified as necessary
1257 * @return string the full name of the link
1258 * @access private
1259 */
1260 function maybeDoSubpageLink($target, &$text) {
1261 # Valid link forms:
1262 # Foobar -- normal
1263 # :Foobar -- override special treatment of prefix (images, language links)
1264 # /Foobar -- convert to CurrentPage/Foobar
1265 # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1266 global $wgNamespacesWithSubpages;
1267
1268 $fname = 'Parser::maybeDoSubpageLink';
1269 wfProfileIn( $fname );
1270 # Look at the first character
1271 if( $target{0} == '/' ) {
1272 # / at end means we don't want the slash to be shown
1273 if(substr($target,-1,1)=='/') {
1274 $target=substr($target,1,-1);
1275 $noslash=$target;
1276 } else {
1277 $noslash=substr($target,1);
1278 }
1279
1280 # Some namespaces don't allow subpages
1281 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) {
1282 # subpages allowed here
1283 $ret = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1284 if( '' === $text ) {
1285 $text = $target;
1286 } # this might be changed for ugliness reasons
1287 } else {
1288 # no subpage allowed, use standard link
1289 $ret = $target;
1290 }
1291 } else {
1292 # no subpage
1293 $ret = $target;
1294 }
1295
1296 wfProfileOut( $fname );
1297 return $ret;
1298 }
1299
1300 /**#@+
1301 * Used by doBlockLevels()
1302 * @access private
1303 */
1304 /* private */ function closeParagraph() {
1305 $result = '';
1306 if ( '' != $this->mLastSection ) {
1307 $result = '</' . $this->mLastSection . ">\n";
1308 }
1309 $this->mInPre = false;
1310 $this->mLastSection = '';
1311 return $result;
1312 }
1313 # getCommon() returns the length of the longest common substring
1314 # of both arguments, starting at the beginning of both.
1315 #
1316 /* private */ function getCommon( $st1, $st2 ) {
1317 $fl = strlen( $st1 );
1318 $shorter = strlen( $st2 );
1319 if ( $fl < $shorter ) { $shorter = $fl; }
1320
1321 for ( $i = 0; $i < $shorter; ++$i ) {
1322 if ( $st1{$i} != $st2{$i} ) { break; }
1323 }
1324 return $i;
1325 }
1326 # These next three functions open, continue, and close the list
1327 # element appropriate to the prefix character passed into them.
1328 #
1329 /* private */ function openList( $char ) {
1330 $result = $this->closeParagraph();
1331
1332 if ( '*' == $char ) { $result .= '<ul><li>'; }
1333 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1334 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1335 else if ( ';' == $char ) {
1336 $result .= '<dl><dt>';
1337 $this->mDTopen = true;
1338 }
1339 else { $result = '<!-- ERR 1 -->'; }
1340
1341 return $result;
1342 }
1343
1344 /* private */ function nextItem( $char ) {
1345 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1346 else if ( ':' == $char || ';' == $char ) {
1347 $close = '</dd>';
1348 if ( $this->mDTopen ) { $close = '</dt>'; }
1349 if ( ';' == $char ) {
1350 $this->mDTopen = true;
1351 return $close . '<dt>';
1352 } else {
1353 $this->mDTopen = false;
1354 return $close . '<dd>';
1355 }
1356 }
1357 return '<!-- ERR 2 -->';
1358 }
1359
1360 /* private */ function closeList( $char ) {
1361 if ( '*' == $char ) { $text = '</li></ul>'; }
1362 else if ( '#' == $char ) { $text = '</li></ol>'; }
1363 else if ( ':' == $char ) {
1364 if ( $this->mDTopen ) {
1365 $this->mDTopen = false;
1366 $text = '</dt></dl>';
1367 } else {
1368 $text = '</dd></dl>';
1369 }
1370 }
1371 else { return '<!-- ERR 3 -->'; }
1372 return $text."\n";
1373 }
1374 /**#@-*/
1375
1376 /**
1377 * Make lists from lines starting with ':', '*', '#', etc.
1378 *
1379 * @access private
1380 * @return string the lists rendered as HTML
1381 */
1382 function doBlockLevels( $text, $linestart ) {
1383 $fname = 'Parser::doBlockLevels';
1384 wfProfileIn( $fname );
1385
1386 # Parsing through the text line by line. The main thing
1387 # happening here is handling of block-level elements p, pre,
1388 # and making lists from lines starting with * # : etc.
1389 #
1390 $textLines = explode( "\n", $text );
1391
1392 $lastPrefix = $output = $lastLine = '';
1393 $this->mDTopen = $inBlockElem = false;
1394 $prefixLength = 0;
1395 $paragraphStack = false;
1396
1397 if ( !$linestart ) {
1398 $output .= array_shift( $textLines );
1399 }
1400 foreach ( $textLines as $oLine ) {
1401 $lastPrefixLength = strlen( $lastPrefix );
1402 $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1403 $preOpenMatch = preg_match('/<pre/i', $oLine );
1404 if ( !$this->mInPre ) {
1405 # Multiple prefixes may abut each other for nested lists.
1406 $prefixLength = strspn( $oLine, '*#:;' );
1407 $pref = substr( $oLine, 0, $prefixLength );
1408
1409 # eh?
1410 $pref2 = str_replace( ';', ':', $pref );
1411 $t = substr( $oLine, $prefixLength );
1412 $this->mInPre = !empty($preOpenMatch);
1413 } else {
1414 # Don't interpret any other prefixes in preformatted text
1415 $prefixLength = 0;
1416 $pref = $pref2 = '';
1417 $t = $oLine;
1418 }
1419
1420 # List generation
1421 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1422 # Same as the last item, so no need to deal with nesting or opening stuff
1423 $output .= $this->nextItem( substr( $pref, -1 ) );
1424 $paragraphStack = false;
1425
1426 if ( substr( $pref, -1 ) == ';') {
1427 # The one nasty exception: definition lists work like this:
1428 # ; title : definition text
1429 # So we check for : in the remainder text to split up the
1430 # title and definition, without b0rking links.
1431 if ($this->findColonNoLinks($t, $term, $t2) !== false) {
1432 $t = $t2;
1433 $output .= $term . $this->nextItem( ':' );
1434 }
1435 }
1436 } elseif( $prefixLength || $lastPrefixLength ) {
1437 # Either open or close a level...
1438 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1439 $paragraphStack = false;
1440
1441 while( $commonPrefixLength < $lastPrefixLength ) {
1442 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1443 --$lastPrefixLength;
1444 }
1445 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1446 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1447 }
1448 while ( $prefixLength > $commonPrefixLength ) {
1449 $char = substr( $pref, $commonPrefixLength, 1 );
1450 $output .= $this->openList( $char );
1451
1452 if ( ';' == $char ) {
1453 # FIXME: This is dupe of code above
1454 if ($this->findColonNoLinks($t, $term, $t2) !== false) {
1455 $t = $t2;
1456 $output .= $term . $this->nextItem( ':' );
1457 }
1458 }
1459 ++$commonPrefixLength;
1460 }
1461 $lastPrefix = $pref2;
1462 }
1463 if( 0 == $prefixLength ) {
1464 # No prefix (not in list)--go to paragraph mode
1465 $uniq_prefix = UNIQ_PREFIX;
1466 // XXX: use a stack for nestable elements like span, table and div
1467 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1468 $closematch = preg_match(
1469 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1470 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1471 if ( $openmatch or $closematch ) {
1472 $paragraphStack = false;
1473 $output .= $this->closeParagraph();
1474 if($preOpenMatch and !$preCloseMatch) {
1475 $this->mInPre = true;
1476 }
1477 if ( $closematch ) {
1478 $inBlockElem = false;
1479 } else {
1480 $inBlockElem = true;
1481 }
1482 } else if ( !$inBlockElem && !$this->mInPre ) {
1483 if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1484 // pre
1485 if ($this->mLastSection != 'pre') {
1486 $paragraphStack = false;
1487 $output .= $this->closeParagraph().'<pre>';
1488 $this->mLastSection = 'pre';
1489 }
1490 $t = substr( $t, 1 );
1491 } else {
1492 // paragraph
1493 if ( '' == trim($t) ) {
1494 if ( $paragraphStack ) {
1495 $output .= $paragraphStack.'<br />';
1496 $paragraphStack = false;
1497 $this->mLastSection = 'p';
1498 } else {
1499 if ($this->mLastSection != 'p' ) {
1500 $output .= $this->closeParagraph();
1501 $this->mLastSection = '';
1502 $paragraphStack = '<p>';
1503 } else {
1504 $paragraphStack = '</p><p>';
1505 }
1506 }
1507 } else {
1508 if ( $paragraphStack ) {
1509 $output .= $paragraphStack;
1510 $paragraphStack = false;
1511 $this->mLastSection = 'p';
1512 } else if ($this->mLastSection != 'p') {
1513 $output .= $this->closeParagraph().'<p>';
1514 $this->mLastSection = 'p';
1515 }
1516 }
1517 }
1518 }
1519 }
1520 if ($paragraphStack === false) {
1521 $output .= $t."\n";
1522 }
1523 }
1524 while ( $prefixLength ) {
1525 $output .= $this->closeList( $pref2{$prefixLength-1} );
1526 --$prefixLength;
1527 }
1528 if ( '' != $this->mLastSection ) {
1529 $output .= '</' . $this->mLastSection . '>';
1530 $this->mLastSection = '';
1531 }
1532
1533 wfProfileOut( $fname );
1534 return $output;
1535 }
1536
1537 /**
1538 * Split up a string on ':', ignoring any occurences inside
1539 * <a>..</a> or <span>...</span>
1540 * @param $str string the string to split
1541 * @param &$before string set to everything before the ':'
1542 * @param &$after string set to everything after the ':'
1543 * return string the position of the ':', or false if none found
1544 */
1545 function findColonNoLinks($str, &$before, &$after) {
1546 # I wonder if we should make this count all tags, not just <a>
1547 # and <span>. That would prevent us from matching a ':' that
1548 # comes in the middle of italics other such formatting....
1549 # -- Wil
1550 $fname = 'Parser::findColonNoLinks';
1551 wfProfileIn( $fname );
1552 $pos = 0;
1553 do {
1554 $colon = strpos($str, ':', $pos);
1555
1556 if ($colon !== false) {
1557 $before = substr($str, 0, $colon);
1558 $after = substr($str, $colon + 1);
1559
1560 # Skip any ':' within <a> or <span> pairs
1561 $a = substr_count($before, '<a');
1562 $s = substr_count($before, '<span');
1563 $ca = substr_count($before, '</a>');
1564 $cs = substr_count($before, '</span>');
1565
1566 if ($a <= $ca and $s <= $cs) {
1567 # Tags are balanced before ':'; ok
1568 break;
1569 }
1570 $pos = $colon + 1;
1571 }
1572 } while ($colon !== false);
1573 wfProfileOut( $fname );
1574 return $colon;
1575 }
1576
1577 /**
1578 * Return value of a magic variable (like PAGENAME)
1579 *
1580 * @access private
1581 */
1582 function getVariableValue( $index ) {
1583 global $wgContLang, $wgSitename, $wgServer;
1584
1585 switch ( $index ) {
1586 case MAG_CURRENTMONTH:
1587 return $wgContLang->formatNum( date( 'm' ) );
1588 case MAG_CURRENTMONTHNAME:
1589 return $wgContLang->getMonthName( date('n') );
1590 case MAG_CURRENTMONTHNAMEGEN:
1591 return $wgContLang->getMonthNameGen( date('n') );
1592 case MAG_CURRENTDAY:
1593 return $wgContLang->formatNum( date('j') );
1594 case MAG_PAGENAME:
1595 return $this->mTitle->getText();
1596 case MAG_PAGENAMEE:
1597 return $this->mTitle->getPartialURL();
1598 case MAG_NAMESPACE:
1599 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1600 return $wgContLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
1601 case MAG_CURRENTDAYNAME:
1602 return $wgContLang->getWeekdayName( date('w')+1 );
1603 case MAG_CURRENTYEAR:
1604 return $wgContLang->formatNum( date( 'Y' ) );
1605 case MAG_CURRENTTIME:
1606 return $wgContLang->time( wfTimestampNow(), false );
1607 case MAG_NUMBEROFARTICLES:
1608 return $wgContLang->formatNum( wfNumberOfArticles() );
1609 case MAG_SITENAME:
1610 return $wgSitename;
1611 case MAG_SERVER:
1612 return $wgServer;
1613 default:
1614 return NULL;
1615 }
1616 }
1617
1618 /**
1619 * initialise the magic variables (like CURRENTMONTHNAME)
1620 *
1621 * @access private
1622 */
1623 function initialiseVariables() {
1624 $fname = 'Parser::initialiseVariables';
1625 wfProfileIn( $fname );
1626 global $wgVariableIDs;
1627 $this->mVariables = array();
1628 foreach ( $wgVariableIDs as $id ) {
1629 $mw =& MagicWord::get( $id );
1630 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1631 }
1632 wfProfileOut( $fname );
1633 }
1634
1635 /**
1636 * Replace magic variables, templates, and template arguments
1637 * with the appropriate text. Templates are substituted recursively,
1638 * taking care to avoid infinite loops.
1639 *
1640 * Note that the substitution depends on value of $mOutputType:
1641 * OT_WIKI: only {{subst:}} templates
1642 * OT_MSG: only magic variables
1643 * OT_HTML: all templates and magic variables
1644 *
1645 * @param string $tex The text to transform
1646 * @param array $args Key-value pairs representing template parameters to substitute
1647 * @access private
1648 */
1649 function replaceVariables( $text, $args = array() ) {
1650 global $wgLang, $wgScript, $wgArticlePath;
1651
1652 # Prevent too big inclusions
1653 if(strlen($text)> MAX_INCLUDE_SIZE)
1654 return $text;
1655
1656 $fname = 'Parser::replaceVariables';
1657 wfProfileIn( $fname );
1658
1659 $titleChars = Title::legalChars();
1660
1661 # This function is called recursively. To keep track of arguments we need a stack:
1662 array_push( $this->mArgStack, $args );
1663
1664 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1665 $GLOBALS['wgCurParser'] =& $this;
1666
1667 # Variable substitution
1668 $text = preg_replace_callback( "/{{([$titleChars]*?)}}/", 'wfVariableSubstitution', $text );
1669
1670 if ( $this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI ) {
1671 # Argument substitution
1672 $text = preg_replace_callback( "/{{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1673 }
1674 # Template substitution
1675 $regex = '/(\\n|{)?{{(['.$titleChars.']*)(\\|.*?|)}}/s';
1676 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1677
1678 array_pop( $this->mArgStack );
1679
1680 wfProfileOut( $fname );
1681 return $text;
1682 }
1683
1684 /**
1685 * Replace magic variables
1686 * @access private
1687 */
1688 function variableSubstitution( $matches ) {
1689 if ( !$this->mVariables ) {
1690 $this->initialiseVariables();
1691 }
1692 $skip = false;
1693 if ( $this->mOutputType == OT_WIKI ) {
1694 # Do only magic variables prefixed by SUBST
1695 $mwSubst =& MagicWord::get( MAG_SUBST );
1696 if (!$mwSubst->matchStartAndRemove( $matches[1] ))
1697 $skip = true;
1698 # Note that if we don't substitute the variable below,
1699 # we don't remove the {{subst:}} magic word, in case
1700 # it is a template rather than a magic variable.
1701 }
1702 if ( !$skip && array_key_exists( $matches[1], $this->mVariables ) ) {
1703 $text = $this->mVariables[$matches[1]];
1704 $this->mOutput->mContainsOldMagic = true;
1705 } else {
1706 $text = $matches[0];
1707 }
1708 return $text;
1709 }
1710
1711 # Split template arguments
1712 function getTemplateArgs( $argsString ) {
1713 if ( $argsString === '' ) {
1714 return array();
1715 }
1716
1717 $args = explode( '|', substr( $argsString, 1 ) );
1718
1719 # If any of the arguments contains a '[[' but no ']]', it needs to be
1720 # merged with the next arg because the '|' character between belongs
1721 # to the link syntax and not the template parameter syntax.
1722 $argc = count($args);
1723 $i = 0;
1724 for ( $i = 0; $i < $argc-1; $i++ ) {
1725 if ( substr_count ( $args[$i], '[[' ) != substr_count ( $args[$i], ']]' ) ) {
1726 $args[$i] .= '|'.$args[$i+1];
1727 array_splice($args, $i+1, 1);
1728 $i--;
1729 $argc--;
1730 }
1731 }
1732
1733 return $args;
1734 }
1735
1736 /**
1737 * Return the text of a template, after recursively
1738 * replacing any variables or templates within the template.
1739 *
1740 * @param array $matches The parts of the template
1741 * $matches[1]: the title, i.e. the part before the |
1742 * $matches[2]: the parameters (including a leading |), if any
1743 * @return string the text of the template
1744 * @access private
1745 */
1746 function braceSubstitution( $matches ) {
1747 global $wgLinkCache, $wgContLang;
1748 $fname = 'Parser::braceSubstitution';
1749 $found = false;
1750 $nowiki = false;
1751 $noparse = false;
1752
1753 $title = NULL;
1754
1755 # Need to know if the template comes at the start of a line,
1756 # to treat the beginning of the template like the beginning
1757 # of a line for tables and block-level elements.
1758 $linestart = $matches[1];
1759
1760 # $part1 is the bit before the first |, and must contain only title characters
1761 # $args is a list of arguments, starting from index 0, not including $part1
1762
1763 $part1 = $matches[2];
1764 # If the third subpattern matched anything, it will start with |
1765
1766 $args = $this->getTemplateArgs($matches[3]);
1767 $argc = count( $args );
1768
1769 # Don't parse {{{}}} because that's only for template arguments
1770 if ( $linestart === '{' ) {
1771 $text = $matches[0];
1772 $found = true;
1773 $noparse = true;
1774 }
1775
1776 # SUBST
1777 if ( !$found ) {
1778 $mwSubst =& MagicWord::get( MAG_SUBST );
1779 if ( $mwSubst->matchStartAndRemove( $part1 ) xor ($this->mOutputType == OT_WIKI) ) {
1780 # One of two possibilities is true:
1781 # 1) Found SUBST but not in the PST phase
1782 # 2) Didn't find SUBST and in the PST phase
1783 # In either case, return without further processing
1784 $text = $matches[0];
1785 $found = true;
1786 $noparse = true;
1787 }
1788 }
1789
1790 # MSG, MSGNW and INT
1791 if ( !$found ) {
1792 # Check for MSGNW:
1793 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1794 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1795 $nowiki = true;
1796 } else {
1797 # Remove obsolete MSG:
1798 $mwMsg =& MagicWord::get( MAG_MSG );
1799 $mwMsg->matchStartAndRemove( $part1 );
1800 }
1801
1802 # Check if it is an internal message
1803 $mwInt =& MagicWord::get( MAG_INT );
1804 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1805 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1806 $text = $linestart . wfMsgReal( $part1, $args, true );
1807 $found = true;
1808 }
1809 }
1810 }
1811
1812 # NS
1813 if ( !$found ) {
1814 # Check for NS: (namespace expansion)
1815 $mwNs = MagicWord::get( MAG_NS );
1816 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1817 if ( intval( $part1 ) ) {
1818 $text = $linestart . $wgContLang->getNsText( intval( $part1 ) );
1819 $found = true;
1820 } else {
1821 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1822 if ( !is_null( $index ) ) {
1823 $text = $linestart . $wgContLang->getNsText( $index );
1824 $found = true;
1825 }
1826 }
1827 }
1828 }
1829
1830 # LOCALURL and LOCALURLE
1831 if ( !$found ) {
1832 $mwLocal = MagicWord::get( MAG_LOCALURL );
1833 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1834
1835 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1836 $func = 'getLocalURL';
1837 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1838 $func = 'escapeLocalURL';
1839 } else {
1840 $func = '';
1841 }
1842
1843 if ( $func !== '' ) {
1844 $title = Title::newFromText( $part1 );
1845 if ( !is_null( $title ) ) {
1846 if ( $argc > 0 ) {
1847 $text = $linestart . $title->$func( $args[0] );
1848 } else {
1849 $text = $linestart . $title->$func();
1850 }
1851 $found = true;
1852 }
1853 }
1854 }
1855
1856 # GRAMMAR
1857 if ( !$found && $argc == 1 ) {
1858 $mwGrammar =& MagicWord::get( MAG_GRAMMAR );
1859 if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
1860 $text = $linestart . $wgContLang->convertGrammar( $args[0], $part1 );
1861 $found = true;
1862 }
1863 }
1864
1865 # Template table test
1866
1867 # Did we encounter this template already? If yes, it is in the cache
1868 # and we need to check for loops.
1869 if ( !$found && isset( $this->mTemplates[$part1] ) ) {
1870 # set $text to cached message.
1871 $text = $linestart . $this->mTemplates[$part1];
1872 $found = true;
1873
1874 # Infinite loop test
1875 if ( isset( $this->mTemplatePath[$part1] ) ) {
1876 $noparse = true;
1877 $found = true;
1878 $text .= '<!-- WARNING: template loop detected -->';
1879 }
1880 }
1881
1882 # Load from database
1883 $itcamefromthedatabase = false;
1884 if ( !$found ) {
1885 $ns = NS_TEMPLATE;
1886 $part1 = $this->maybeDoSubpageLink( $part1, $subpage='' );
1887 if ($subpage !== '') {
1888 $ns = $this->mTitle->getNamespace();
1889 }
1890 $title = Title::newFromText( $part1, $ns );
1891 if ( !is_null( $title ) && !$title->isExternal() ) {
1892 # Check for excessive inclusion
1893 $dbk = $title->getPrefixedDBkey();
1894 if ( $this->incrementIncludeCount( $dbk ) ) {
1895 # This should never be reached.
1896 $article = new Article( $title );
1897 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1898 if ( $articleContent !== false ) {
1899 $found = true;
1900 $text = $linestart . $articleContent;
1901 $itcamefromthedatabase = true;
1902 }
1903 }
1904
1905 # If the title is valid but undisplayable, make a link to it
1906 if ( $this->mOutputType == OT_HTML && !$found ) {
1907 $text = $linestart . '[['.$title->getPrefixedText().']]';
1908 $found = true;
1909 }
1910
1911 # Template cache array insertion
1912 $this->mTemplates[$part1] = $text;
1913 }
1914 }
1915
1916 # Recursive parsing, escaping and link table handling
1917 # Only for HTML output
1918 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1919 $text = wfEscapeWikiText( $text );
1920 } elseif ( ($this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI) && $found && !$noparse) {
1921 # Clean up argument array
1922 $assocArgs = array();
1923 $index = 1;
1924 foreach( $args as $arg ) {
1925 $eqpos = strpos( $arg, '=' );
1926 if ( $eqpos === false ) {
1927 $assocArgs[$index++] = $arg;
1928 } else {
1929 $name = trim( substr( $arg, 0, $eqpos ) );
1930 $value = trim( substr( $arg, $eqpos+1 ) );
1931 if ( $value === false ) {
1932 $value = '';
1933 }
1934 if ( $name !== false ) {
1935 $assocArgs[$name] = $value;
1936 }
1937 }
1938 }
1939
1940 # Add a new element to the templace recursion path
1941 $this->mTemplatePath[$part1] = 1;
1942
1943 $text = $this->strip( $text, $this->mStripState );
1944 $text = $this->removeHTMLtags( $text );
1945 $text = $this->replaceVariables( $text, $assocArgs );
1946
1947 # Resume the link cache and register the inclusion as a link
1948 if ( $this->mOutputType == OT_HTML && !is_null( $title ) ) {
1949 $wgLinkCache->addLinkObj( $title );
1950 }
1951
1952 # If the template begins with a table or block-level
1953 # element, it should be treated as beginning a new line.
1954 if ($linestart !== '\n' && preg_match('/^({\\||:|;|#|\*)/', $text)) {
1955 $text = "\n" . $text;
1956 }
1957 }
1958
1959 # Empties the template path
1960 $this->mTemplatePath = array();
1961 if ( !$found ) {
1962 return $matches[0];
1963 } else {
1964 # replace ==section headers==
1965 # XXX this needs to go away once we have a better parser.
1966 if ( $this->mOutputType != OT_WIKI && $itcamefromthedatabase ) {
1967 if( !is_null( $title ) )
1968 $encodedname = base64_encode($title->getPrefixedDBkey());
1969 else
1970 $encodedname = base64_encode("");
1971 $m = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1,
1972 PREG_SPLIT_DELIM_CAPTURE);
1973 $text = '';
1974 $nsec = 0;
1975 for( $i = 0; $i < count($m); $i += 2 ) {
1976 $text .= $m[$i];
1977 if (!isset($m[$i + 1]) || $m[$i + 1] == "") continue;
1978 $hl = $m[$i + 1];
1979 if( strstr($hl, "<!--MWTEMPLATESECTION") ) {
1980 $text .= $hl;
1981 continue;
1982 }
1983 preg_match('/^(={1,6})(.*?)(={1,6})\s*?$/m', $hl, $m2);
1984 $text .= $m2[1] . $m2[2] . "<!--MWTEMPLATESECTION="
1985 . $encodedname . "&" . base64_encode("$nsec") . "-->" . $m2[3];
1986
1987 $nsec++;
1988 }
1989 }
1990 }
1991
1992 # Empties the template path
1993 $this->mTemplatePath = array();
1994 if ( !$found ) {
1995 return $matches[0];
1996 } else {
1997 return $text;
1998 }
1999 }
2000
2001 /**
2002 * Triple brace replacement -- used for template arguments
2003 * @access private
2004 */
2005 function argSubstitution( $matches ) {
2006 $arg = trim( $matches[1] );
2007 $text = $matches[0];
2008 $inputArgs = end( $this->mArgStack );
2009
2010 if ( array_key_exists( $arg, $inputArgs ) ) {
2011 $text = $inputArgs[$arg];
2012 }
2013
2014 return $text;
2015 }
2016
2017 /**
2018 * Returns true if the function is allowed to include this entity
2019 * @access private
2020 */
2021 function incrementIncludeCount( $dbk ) {
2022 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
2023 $this->mIncludeCount[$dbk] = 0;
2024 }
2025 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
2026 return true;
2027 } else {
2028 return false;
2029 }
2030 }
2031
2032
2033 /**
2034 * Cleans up HTML, removes dangerous tags and attributes, and
2035 * removes HTML comments
2036 * @access private
2037 */
2038 function removeHTMLtags( $text ) {
2039 global $wgUseTidy, $wgUserHtml;
2040 $fname = 'Parser::removeHTMLtags';
2041 wfProfileIn( $fname );
2042
2043 if( $wgUserHtml ) {
2044 $htmlpairs = array( # Tags that must be closed
2045 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
2046 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
2047 'strike', 'strong', 'tt', 'var', 'div', 'center',
2048 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
2049 'ruby', 'rt' , 'rb' , 'rp', 'p'
2050 );
2051 $htmlsingle = array(
2052 'br', 'hr', 'li', 'dt', 'dd'
2053 );
2054 $htmlnest = array( # Tags that can be nested--??
2055 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
2056 'dl', 'font', 'big', 'small', 'sub', 'sup'
2057 );
2058 $tabletags = array( # Can only appear inside table
2059 'td', 'th', 'tr'
2060 );
2061 } else {
2062 $htmlpairs = array();
2063 $htmlsingle = array();
2064 $htmlnest = array();
2065 $tabletags = array();
2066 }
2067
2068 $htmlsingle = array_merge( $tabletags, $htmlsingle );
2069 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
2070
2071 $htmlattrs = $this->getHTMLattrs () ;
2072
2073 # Remove HTML comments
2074 $text = $this->removeHTMLcomments( $text );
2075
2076 $bits = explode( '<', $text );
2077 $text = array_shift( $bits );
2078 if(!$wgUseTidy) {
2079 $tagstack = array(); $tablestack = array();
2080 foreach ( $bits as $x ) {
2081 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
2082 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2083 $x, $regs );
2084 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2085 error_reporting( $prev );
2086
2087 $badtag = 0 ;
2088 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2089 # Check our stack
2090 if ( $slash ) {
2091 # Closing a tag...
2092 if ( ! in_array( $t, $htmlsingle ) &&
2093 ( $ot = @array_pop( $tagstack ) ) != $t ) {
2094 @array_push( $tagstack, $ot );
2095 $badtag = 1;
2096 } else {
2097 if ( $t == 'table' ) {
2098 $tagstack = array_pop( $tablestack );
2099 }
2100 $newparams = '';
2101 }
2102 } else {
2103 # Keep track for later
2104 if ( in_array( $t, $tabletags ) &&
2105 ! in_array( 'table', $tagstack ) ) {
2106 $badtag = 1;
2107 } else if ( in_array( $t, $tagstack ) &&
2108 ! in_array ( $t , $htmlnest ) ) {
2109 $badtag = 1 ;
2110 } else if ( ! in_array( $t, $htmlsingle ) ) {
2111 if ( $t == 'table' ) {
2112 array_push( $tablestack, $tagstack );
2113 $tagstack = array();
2114 }
2115 array_push( $tagstack, $t );
2116 }
2117 # Strip non-approved attributes from the tag
2118 $newparams = $this->fixTagAttributes($params);
2119
2120 }
2121 if ( ! $badtag ) {
2122 $rest = str_replace( '>', '&gt;', $rest );
2123 $text .= "<$slash$t $newparams$brace$rest";
2124 continue;
2125 }
2126 }
2127 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2128 }
2129 # Close off any remaining tags
2130 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
2131 $text .= "</$t>\n";
2132 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2133 }
2134 } else {
2135 # this might be possible using tidy itself
2136 foreach ( $bits as $x ) {
2137 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2138 $x, $regs );
2139 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2140 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2141 $newparams = $this->fixTagAttributes($params);
2142 $rest = str_replace( '>', '&gt;', $rest );
2143 $text .= "<$slash$t $newparams$brace$rest";
2144 } else {
2145 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2146 }
2147 }
2148 }
2149 wfProfileOut( $fname );
2150 return $text;
2151 }
2152
2153 /**
2154 * Remove '<!--', '-->', and everything between.
2155 * To avoid leaving blank lines, when a comment is both preceded
2156 * and followed by a newline (ignoring spaces), trim leading and
2157 * trailing spaces and one of the newlines.
2158 *
2159 * @access private
2160 */
2161 function removeHTMLcomments( $text ) {
2162 $fname='Parser::removeHTMLcomments';
2163 wfProfileIn( $fname );
2164 while (($start = strpos($text, '<!--')) !== false) {
2165 $end = strpos($text, '-->', $start + 4);
2166 if ($end === false) {
2167 # Unterminated comment; bail out
2168 break;
2169 }
2170
2171 $end += 3;
2172
2173 # Trim space and newline if the comment is both
2174 # preceded and followed by a newline
2175 $spaceStart = max($start - 1, 0);
2176 $spaceLen = $end - $spaceStart;
2177 while (substr($text, $spaceStart, 1) === ' ' && $spaceStart > 0) {
2178 $spaceStart--;
2179 $spaceLen++;
2180 }
2181 while (substr($text, $spaceStart + $spaceLen, 1) === ' ')
2182 $spaceLen++;
2183 if (substr($text, $spaceStart, 1) === "\n" and substr($text, $spaceStart + $spaceLen, 1) === "\n") {
2184 # Remove the comment, leading and trailing
2185 # spaces, and leave only one newline.
2186 $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
2187 }
2188 else {
2189 # Remove just the comment.
2190 $text = substr_replace($text, '', $start, $end - $start);
2191 }
2192 }
2193 wfProfileOut( $fname );
2194 return $text;
2195 }
2196
2197 /**
2198 * This function accomplishes several tasks:
2199 * 1) Auto-number headings if that option is enabled
2200 * 2) Add an [edit] link to sections for logged in users who have enabled the option
2201 * 3) Add a Table of contents on the top for users who have enabled the option
2202 * 4) Auto-anchor headings
2203 *
2204 * It loops through all headlines, collects the necessary data, then splits up the
2205 * string and re-inserts the newly formatted headlines.
2206 * @access private
2207 */
2208 /* private */ function formatHeadings( $text, $isMain=true ) {
2209 global $wgInputEncoding, $wgMaxTocLevel, $wgContLang, $wgLinkHolders;
2210
2211 $doNumberHeadings = $this->mOptions->getNumberHeadings();
2212 $doShowToc = $this->mOptions->getShowToc();
2213 $forceTocHere = false;
2214 if( !$this->mTitle->userCanEdit() ) {
2215 $showEditLink = 0;
2216 $rightClickHack = 0;
2217 } else {
2218 $showEditLink = $this->mOptions->getEditSection();
2219 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
2220 }
2221
2222 # Inhibit editsection links if requested in the page
2223 $esw =& MagicWord::get( MAG_NOEDITSECTION );
2224 if( $esw->matchAndRemove( $text ) ) {
2225 $showEditLink = 0;
2226 }
2227 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2228 # do not add TOC
2229 $mw =& MagicWord::get( MAG_NOTOC );
2230 if( $mw->matchAndRemove( $text ) ) {
2231 $doShowToc = 0;
2232 }
2233
2234 # never add the TOC to the Main Page. This is an entry page that should not
2235 # be more than 1-2 screens large anyway
2236 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
2237 $doShowToc = 0;
2238 }
2239
2240 # Get all headlines for numbering them and adding funky stuff like [edit]
2241 # links - this is for later, but we need the number of headlines right now
2242 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2243
2244 # if there are fewer than 4 headlines in the article, do not show TOC
2245 if( $numMatches < 4 ) {
2246 $doShowToc = 0;
2247 }
2248
2249 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2250 # override above conditions and always show TOC at that place
2251 $mw =& MagicWord::get( MAG_TOC );
2252 if ($mw->match( $text ) ) {
2253 $doShowToc = 1;
2254 $forceTocHere = true;
2255 } else {
2256 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2257 # override above conditions and always show TOC above first header
2258 $mw =& MagicWord::get( MAG_FORCETOC );
2259 if ($mw->matchAndRemove( $text ) ) {
2260 $doShowToc = 1;
2261 }
2262 }
2263
2264
2265
2266 # We need this to perform operations on the HTML
2267 $sk =& $this->mOptions->getSkin();
2268
2269 # headline counter
2270 $headlineCount = 0;
2271 $sectionCount = 0; # headlineCount excluding template sections
2272
2273 # Ugh .. the TOC should have neat indentation levels which can be
2274 # passed to the skin functions. These are determined here
2275 $toclevel = 0;
2276 $toc = '';
2277 $full = '';
2278 $head = array();
2279 $sublevelCount = array();
2280 $level = 0;
2281 $prevlevel = 0;
2282 foreach( $matches[3] as $headline ) {
2283 $istemplate = 0;
2284 $templatetitle = "";
2285 $templatesection = 0;
2286
2287 if (preg_match("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", $headline, $mat)) {
2288 $istemplate = 1;
2289 $templatetitle = base64_decode($mat[1]);
2290 $templatesection = 1 + (int)base64_decode($mat[2]);
2291 $headline = preg_replace("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", "", $headline);
2292 }
2293
2294 $numbering = '';
2295 if( $level ) {
2296 $prevlevel = $level;
2297 }
2298 $level = $matches[1][$headlineCount];
2299 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
2300 # reset when we enter a new level
2301 $sublevelCount[$level] = 0;
2302 $toc .= $sk->tocIndent( $level - $prevlevel );
2303 $toclevel += $level - $prevlevel;
2304 }
2305 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
2306 # reset when we step back a level
2307 $sublevelCount[$level+1]=0;
2308 $toc .= $sk->tocUnindent( $prevlevel - $level );
2309 $toclevel -= $prevlevel - $level;
2310 }
2311 # count number of headlines for each level
2312 @$sublevelCount[$level]++;
2313 if( $doNumberHeadings || $doShowToc ) {
2314 $dot = 0;
2315 for( $i = 1; $i <= $level; $i++ ) {
2316 if( !empty( $sublevelCount[$i] ) ) {
2317 if( $dot ) {
2318 $numbering .= '.';
2319 }
2320 $numbering .= $wgContLang->formatNum( $sublevelCount[$i] );
2321 $dot = 1;
2322 }
2323 }
2324 }
2325
2326 # The canonized header is a version of the header text safe to use for links
2327 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2328 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
2329 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
2330
2331 # Remove link placeholders by the link text.
2332 # <!--LINK number-->
2333 # turns into
2334 # link text with suffix
2335 $canonized_headline = preg_replace( '/<!--LINK ([0-9]*)-->/e',
2336 "\$wgLinkHolders['texts'][\$1]",
2337 $canonized_headline );
2338
2339 # strip out HTML
2340 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2341 $tocline = trim( $canonized_headline );
2342 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
2343 $replacearray = array(
2344 '%3A' => ':',
2345 '%' => '.'
2346 );
2347 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2348 $refer[$headlineCount] = $canonized_headline;
2349
2350 # count how many in assoc. array so we can track dupes in anchors
2351 @$refers[$canonized_headline]++;
2352 $refcount[$headlineCount]=$refers[$canonized_headline];
2353
2354 # Prepend the number to the heading text
2355
2356 if( $doNumberHeadings || $doShowToc ) {
2357 $tocline = $numbering . ' ' . $tocline;
2358
2359 # Don't number the heading if it is the only one (looks silly)
2360 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2361 # the two are different if the line contains a link
2362 $headline=$numbering . ' ' . $headline;
2363 }
2364 }
2365
2366 # Create the anchor for linking from the TOC to the section
2367 $anchor = $canonized_headline;
2368 if($refcount[$headlineCount] > 1 ) {
2369 $anchor .= '_' . $refcount[$headlineCount];
2370 }
2371 if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2372 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2373 }
2374 if( $showEditLink && ( !$istemplate || $templatetitle !== "" ) ) {
2375 if ( empty( $head[$headlineCount] ) ) {
2376 $head[$headlineCount] = '';
2377 }
2378 if( $istemplate )
2379 $head[$headlineCount] .= $sk->editSectionLinkForOther($templatetitle, $templatesection);
2380 else
2381 $head[$headlineCount] .= $sk->editSectionLink($sectionCount+1);
2382 }
2383
2384 # Add the edit section span
2385 if( $rightClickHack ) {
2386 if( $istemplate )
2387 $headline = $sk->editSectionScriptForOther($templatetitle, $templatesection, $headline);
2388 else
2389 $headline = $sk->editSectionScript($sectionCount+1,$headline);
2390 }
2391
2392 # give headline the correct <h#> tag
2393 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.'</h'.$level.'>';
2394
2395 $headlineCount++;
2396 if( !$istemplate )
2397 $sectionCount++;
2398 }
2399
2400 if( $doShowToc ) {
2401 $toclines = $headlineCount;
2402 $toc .= $sk->tocUnindent( $toclevel );
2403 $toc = $sk->tocTable( $toc );
2404 }
2405
2406 # split up and insert constructed headlines
2407
2408 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2409 $i = 0;
2410
2411 foreach( $blocks as $block ) {
2412 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2413 # This is the [edit] link that appears for the top block of text when
2414 # section editing is enabled
2415
2416 # Disabled because it broke block formatting
2417 # For example, a bullet point in the top line
2418 # $full .= $sk->editSectionLink(0);
2419 }
2420 $full .= $block;
2421 if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2422 # Top anchor now in skin
2423 $full = $full.$toc;
2424 }
2425
2426 if( !empty( $head[$i] ) ) {
2427 $full .= $head[$i];
2428 }
2429 $i++;
2430 }
2431 if($forceTocHere) {
2432 $mw =& MagicWord::get( MAG_TOC );
2433 return $mw->replace( $toc, $full );
2434 } else {
2435 return $full;
2436 }
2437 }
2438
2439 /**
2440 * Return an HTML link for the "ISBN 123456" text
2441 * @access private
2442 */
2443 function magicISBN( $text ) {
2444 global $wgLang;
2445 $fname = 'Parser::magicISBN';
2446 wfProfileIn( $fname );
2447
2448 $a = split( 'ISBN ', ' '.$text );
2449 if ( count ( $a ) < 2 ) {
2450 wfProfileOut( $fname );
2451 return $text;
2452 }
2453 $text = substr( array_shift( $a ), 1);
2454 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2455
2456 foreach ( $a as $x ) {
2457 $isbn = $blank = '' ;
2458 while ( ' ' == $x{0} ) {
2459 $blank .= ' ';
2460 $x = substr( $x, 1 );
2461 }
2462 if ( $x == '' ) { # blank isbn
2463 $text .= "ISBN $blank";
2464 continue;
2465 }
2466 while ( strstr( $valid, $x{0} ) != false ) {
2467 $isbn .= $x{0};
2468 $x = substr( $x, 1 );
2469 }
2470 $num = str_replace( '-', '', $isbn );
2471 $num = str_replace( ' ', '', $num );
2472
2473 if ( '' == $num ) {
2474 $text .= "ISBN $blank$x";
2475 } else {
2476 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2477 $text .= '<a href="' .
2478 $titleObj->escapeLocalUrl( 'isbn='.$num ) .
2479 "\" class=\"internal\">ISBN $isbn</a>";
2480 $text .= $x;
2481 }
2482 }
2483 wfProfileOut( $fname );
2484 return $text;
2485 }
2486
2487 /**
2488 * Return an HTML link for the "GEO ..." text
2489 * @access private
2490 */
2491 function magicGEO( $text ) {
2492 global $wgLang, $wgUseGeoMode;
2493 $fname = 'Parser::magicGEO';
2494 wfProfileIn( $fname );
2495
2496 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2497 $directions = array ( 'N' => 'North' , 'S' => 'South' , 'E' => 'East' , 'W' => 'West' ) ;
2498 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2499 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2500 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2501 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2502
2503 $a = split( 'GEO ', ' '.$text );
2504 if ( count ( $a ) < 2 ) {
2505 wfProfileOut( $fname );
2506 return $text;
2507 }
2508 $text = substr( array_shift( $a ), 1);
2509 $valid = '0123456789.+-:';
2510
2511 foreach ( $a as $x ) {
2512 $geo = $blank = '' ;
2513 while ( ' ' == $x{0} ) {
2514 $blank .= ' ';
2515 $x = substr( $x, 1 );
2516 }
2517 while ( strstr( $valid, $x{0} ) != false ) {
2518 $geo .= $x{0};
2519 $x = substr( $x, 1 );
2520 }
2521 $num = str_replace( '+', '', $geo );
2522 $num = str_replace( ' ', '', $num );
2523
2524 if ( '' == $num || count ( explode ( ':' , $num , 3 ) ) < 2 ) {
2525 $text .= "GEO $blank$x";
2526 } else {
2527 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2528 $text .= '<a href="' .
2529 $titleObj->escapeLocalUrl( 'coordinates='.$num ) .
2530 "\" class=\"internal\">GEO $geo</a>";
2531 $text .= $x;
2532 }
2533 }
2534 wfProfileOut( $fname );
2535 return $text;
2536 }
2537
2538 /**
2539 * Return an HTML link for the "RFC 1234" text
2540 * @access private
2541 * @param string $text text to be processed
2542 */
2543 function magicRFC( $text ) {
2544 global $wgLang;
2545
2546 $valid = '0123456789';
2547 $internal = false;
2548
2549 $a = split( 'RFC ', ' '.$text );
2550 if ( count ( $a ) < 2 ) return $text;
2551 $text = substr( array_shift( $a ), 1);
2552
2553 /* Check if RFC keyword is preceed by [[.
2554 * This test is made here cause of the array_shift above
2555 * that prevent the test to be done in the foreach.
2556 */
2557 if(substr($text, -2) == '[[') { $internal = true; }
2558
2559 foreach ( $a as $x ) {
2560 /* token might be empty if we have RFC RFC 1234 */
2561 if($x=='') {
2562 $text.='RFC ';
2563 continue;
2564 }
2565
2566 $rfc = $blank = '' ;
2567
2568 /** remove and save whitespaces in $blank */
2569 while ( $x{0} == ' ' ) {
2570 $blank .= ' ';
2571 $x = substr( $x, 1 );
2572 }
2573
2574 /** remove and save the rfc number in $rfc */
2575 while ( strstr( $valid, $x{0} ) != false ) {
2576 $rfc .= $x{0};
2577 $x = substr( $x, 1 );
2578 }
2579
2580 if ( $rfc == '') {
2581 /* call back stripped spaces*/
2582 $text .= "RFC $blank$x";
2583 } elseif( $internal) {
2584 /* normal link */
2585 $text .= "RFC $rfc$x";
2586 } else {
2587 /* build the external link*/
2588 $url = wfmsg( 'rfcurl' );
2589 $url = str_replace( '$1', $rfc, $url);
2590 $sk =& $this->mOptions->getSkin();
2591 $la = $sk->getExternalLinkAttributes( $url, 'RFC '.$rfc );
2592 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2593 }
2594
2595 /* Check if the next RFC keyword is preceed by [[ */
2596 $internal = (substr($x,-2) == '[[');
2597 }
2598 return $text;
2599 }
2600
2601 /**
2602 * Transform wiki markup when saving a page by doing \r\n -> \n
2603 * conversion, substitting signatures, {{subst:}} templates, etc.
2604 *
2605 * @param string $text the text to transform
2606 * @param Title &$title the Title object for the current article
2607 * @param User &$user the User object describing the current user
2608 * @param ParserOptions $options parsing options
2609 * @param bool $clearState whether to clear the parser state first
2610 * @return string the altered wiki markup
2611 * @access public
2612 */
2613 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2614 $this->mOptions = $options;
2615 $this->mTitle =& $title;
2616 $this->mOutputType = OT_WIKI;
2617
2618 if ( $clearState ) {
2619 $this->clearState();
2620 }
2621
2622 $stripState = false;
2623 $pairs = array(
2624 "\r\n" => "\n",
2625 );
2626 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2627 // now with regexes
2628 /*
2629 $pairs = array(
2630 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2631 "/<br *?>/i" => "<br />",
2632 );
2633 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2634 */
2635 $text = $this->strip( $text, $stripState, false );
2636 $text = $this->pstPass2( $text, $user );
2637 $text = $this->unstrip( $text, $stripState );
2638 $text = $this->unstripNoWiki( $text, $stripState );
2639 return $text;
2640 }
2641
2642 /**
2643 * Pre-save transform helper function
2644 * @access private
2645 */
2646 function pstPass2( $text, &$user ) {
2647 global $wgLang, $wgContLang, $wgLocaltimezone, $wgCurParser;
2648
2649 # Variable replacement
2650 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2651 $text = $this->replaceVariables( $text );
2652
2653 # Signatures
2654 #
2655 $n = $user->getName();
2656 $k = $user->getOption( 'nickname' );
2657 if ( '' == $k ) { $k = $n; }
2658 if(isset($wgLocaltimezone)) {
2659 $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2660 }
2661 /* Note: this is an ugly timezone hack for the European wikis */
2662 $d = $wgContLang->timeanddate( date( 'YmdHis' ), false ) .
2663 ' (' . date( 'T' ) . ')';
2664 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2665
2666 $text = preg_replace( '/~~~~~/', $d, $text );
2667 $text = preg_replace( '/~~~~/', '[[' . $wgContLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2668 $text = preg_replace( '/~~~/', '[[' . $wgContLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2669
2670 # Context links: [[|name]] and [[name (context)|]]
2671 #
2672 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2673 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2674 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2675 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2676
2677 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
2678 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
2679 $p3 = "/\[\[(:*$namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]] and [[:namespace:page|]]
2680 $p4 = "/\[\[(:*$namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/"; # [[ns:page (cont)|]] and [[:ns:page (cont)|]]
2681 $context = '';
2682 $t = $this->mTitle->getText();
2683 if ( preg_match( $conpat, $t, $m ) ) {
2684 $context = $m[2];
2685 }
2686 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2687 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2688 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2689
2690 if ( '' == $context ) {
2691 $text = preg_replace( $p2, '[[\\1]]', $text );
2692 } else {
2693 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2694 }
2695
2696 # Trim trailing whitespace
2697 # MAG_END (__END__) tag allows for trailing
2698 # whitespace to be deliberately included
2699 $text = rtrim( $text );
2700 $mw =& MagicWord::get( MAG_END );
2701 $mw->matchAndRemove( $text );
2702
2703 return $text;
2704 }
2705
2706 /**
2707 * Set up some variables which are usually set up in parse()
2708 * so that an external function can call some class members with confidence
2709 * @access public
2710 */
2711 function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2712 $this->mTitle =& $title;
2713 $this->mOptions = $options;
2714 $this->mOutputType = $outputType;
2715 if ( $clearState ) {
2716 $this->clearState();
2717 }
2718 }
2719
2720 /**
2721 * Transform a MediaWiki message by replacing magic variables.
2722 *
2723 * @param string $text the text to transform
2724 * @param ParserOptions $options options
2725 * @return string the text with variables substituted
2726 * @access public
2727 */
2728 function transformMsg( $text, $options ) {
2729 global $wgTitle;
2730 static $executing = false;
2731
2732 # Guard against infinite recursion
2733 if ( $executing ) {
2734 return $text;
2735 }
2736 $executing = true;
2737
2738 $this->mTitle = $wgTitle;
2739 $this->mOptions = $options;
2740 $this->mOutputType = OT_MSG;
2741 $this->clearState();
2742 $text = $this->replaceVariables( $text );
2743
2744 $executing = false;
2745 return $text;
2746 }
2747
2748 /**
2749 * Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2750 * Callback will be called with the text within
2751 * Transform and return the text within
2752 * @access public
2753 */
2754 function setHook( $tag, $callback ) {
2755 $oldVal = @$this->mTagHooks[$tag];
2756 $this->mTagHooks[$tag] = $callback;
2757 return $oldVal;
2758 }
2759 }
2760
2761 /**
2762 * @todo document
2763 * @package MediaWiki
2764 */
2765 class ParserOutput
2766 {
2767 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2768 var $mCacheTime; # Used in ParserCache
2769
2770 function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
2771 $containsOldMagic = false )
2772 {
2773 $this->mText = $text;
2774 $this->mLanguageLinks = $languageLinks;
2775 $this->mCategoryLinks = $categoryLinks;
2776 $this->mContainsOldMagic = $containsOldMagic;
2777 $this->mCacheTime = '';
2778 }
2779
2780 function getText() { return $this->mText; }
2781 function getLanguageLinks() { return $this->mLanguageLinks; }
2782 function getCategoryLinks() { return $this->mCategoryLinks; }
2783 function getCacheTime() { return $this->mCacheTime; }
2784 function containsOldMagic() { return $this->mContainsOldMagic; }
2785 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2786 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2787 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2788 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2789 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2790
2791 function merge( $other ) {
2792 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2793 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2794 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2795 }
2796
2797 }
2798
2799 /**
2800 * Set options of the Parser
2801 * @todo document
2802 * @package MediaWiki
2803 */
2804 class ParserOptions
2805 {
2806 # All variables are private
2807 var $mUseTeX; # Use texvc to expand <math> tags
2808 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2809 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2810 var $mAllowExternalImages; # Allow external images inline
2811 var $mSkin; # Reference to the preferred skin
2812 var $mDateFormat; # Date format index
2813 var $mEditSection; # Create "edit section" links
2814 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2815 var $mNumberHeadings; # Automatically number headings
2816 var $mShowToc; # Show table of contents
2817
2818 function getUseTeX() { return $this->mUseTeX; }
2819 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2820 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2821 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2822 function getSkin() { return $this->mSkin; }
2823 function getDateFormat() { return $this->mDateFormat; }
2824 function getEditSection() { return $this->mEditSection; }
2825 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2826 function getNumberHeadings() { return $this->mNumberHeadings; }
2827 function getShowToc() { return $this->mShowToc; }
2828
2829 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2830 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2831 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2832 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2833 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2834 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2835 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2836 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2837 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2838
2839 function setSkin( &$x ) { $this->mSkin =& $x; }
2840
2841 # Get parser options
2842 /* static */ function newFromUser( &$user ) {
2843 $popts = new ParserOptions;
2844 $popts->initialiseFromUser( $user );
2845 return $popts;
2846 }
2847
2848 # Get user options
2849 function initialiseFromUser( &$userInput ) {
2850 global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2851
2852 $fname = 'ParserOptions::initialiseFromUser';
2853 wfProfileIn( $fname );
2854 if ( !$userInput ) {
2855 $user = new User;
2856 $user->setLoaded( true );
2857 } else {
2858 $user =& $userInput;
2859 }
2860
2861 $this->mUseTeX = $wgUseTeX;
2862 $this->mUseDynamicDates = $wgUseDynamicDates;
2863 $this->mInterwikiMagic = $wgInterwikiMagic;
2864 $this->mAllowExternalImages = $wgAllowExternalImages;
2865 wfProfileIn( $fname.'-skin' );
2866 $this->mSkin =& $user->getSkin();
2867 wfProfileOut( $fname.'-skin' );
2868 $this->mDateFormat = $user->getOption( 'date' );
2869 $this->mEditSection = $user->getOption( 'editsection' );
2870 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2871 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2872 $this->mShowToc = $user->getOption( 'showtoc' );
2873 wfProfileOut( $fname );
2874 }
2875
2876
2877 }
2878
2879 # Regex callbacks, used in Parser::replaceVariables
2880 function wfBraceSubstitution( $matches ) {
2881 global $wgCurParser;
2882 return $wgCurParser->braceSubstitution( $matches );
2883 }
2884
2885 function wfArgSubstitution( $matches ) {
2886 global $wgCurParser;
2887 return $wgCurParser->argSubstitution( $matches );
2888 }
2889
2890 function wfVariableSubstitution( $matches ) {
2891 global $wgCurParser;
2892 return $wgCurParser->variableSubstitution( $matches );
2893 }
2894
2895 /**
2896 * Return the total number of articles
2897 */
2898 function wfNumberOfArticles() {
2899 global $wgNumberOfArticles;
2900
2901 wfLoadSiteStats();
2902 return $wgNumberOfArticles;
2903 }
2904
2905 /**
2906 * Get various statistics from the database
2907 * @private
2908 */
2909 function wfLoadSiteStats() {
2910 global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2911 $fname = 'wfLoadSiteStats';
2912
2913 if ( -1 != $wgNumberOfArticles ) return;
2914 $dbr =& wfGetDB( DB_SLAVE );
2915 $s = $dbr->getArray( 'site_stats',
2916 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2917 array( 'ss_row_id' => 1 ), $fname
2918 );
2919
2920 if ( $s === false ) {
2921 return;
2922 } else {
2923 $wgTotalViews = $s->ss_total_views;
2924 $wgTotalEdits = $s->ss_total_edits;
2925 $wgNumberOfArticles = $s->ss_good_articles;
2926 }
2927 }
2928
2929 function wfEscapeHTMLTagsOnly( $in ) {
2930 return str_replace(
2931 array( '"', '>', '<' ),
2932 array( '&quot;', '&gt;', '&lt;' ),
2933 $in );
2934 }
2935
2936 ?>