minor bug fix
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 // require_once('Tokenizer.php');
4
5 # PHP Parser
6 #
7 # Processes wiki markup
8 #
9 # There are two main entry points into the Parser class:
10 # parse()
11 # produces HTML output
12 # preSaveTransform().
13 # produces altered wiki markup.
14 #
15 # Globals used:
16 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
17 #
18 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
19 #
20 # settings:
21 # $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
22 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
23 # $wgLocaltimezone
24 #
25 # * only within ParserOptions
26 #
27 #----------------------------------------
28 # Variable substitution O(N^2) attack
29 #-----------------------------------------
30 # Without countermeasures, it would be possible to attack the parser by saving
31 # a page filled with a large number of inclusions of large pages. The size of
32 # the generated page would be proportional to the square of the input size.
33 # Hence, we limit the number of inclusions of any given page, thus bringing any
34 # attack back to O(N).
35 define( "MAX_INCLUDE_REPEAT", 100 );
36 define( "MAX_INCLUDE_SIZE", 1000000 ); // 1 Million
37
38 # Allowed values for $mOutputType
39 define( "OT_HTML", 1 );
40 define( "OT_WIKI", 2 );
41 define( "OT_MSG" , 3 );
42
43 # string parameter for extractTags which will cause it
44 # to strip HTML comments in addition to regular
45 # <XML>-style tags. This should not be anything we
46 # may want to use in wikisyntax
47 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
48
49 # prefix for escaping, used in two functions at least
50 define( 'UNIQ_PREFIX', 'NaodW29');
51
52 # Constants needed for external link processing
53 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
54 define( 'HTTP_PROTOCOLS', 'http|https' );
55 # Everything except bracket, space, or control characters
56 define( 'EXT_LINK_URL_CLASS', '[^]\\x00-\\x20\\x7F]' );
57 define( 'INVERSE_EXT_LINK_URL_CLASS', '[\]\\x00-\\x20\\x7F]' );
58 # Including space
59 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
60 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
61 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
62 define( 'EXT_LINK_BRACKETED', '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
63 define( 'EXT_IMAGE_REGEX',
64 '/^('.HTTP_PROTOCOLS.':)'. # Protocol
65 '('.EXT_LINK_URL_CLASS.'+)\\/'. # Hostname and path
66 '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
67 );
68
69 class Parser
70 {
71 # Persistent:
72 var $mTagHooks;
73
74 # Cleared with clearState():
75 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
76 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
77
78 # Temporary:
79 var $mOptions, $mTitle, $mOutputType,
80 $mTemplates, // cache of already loaded templates, avoids
81 // multiple SQL queries for the same string
82 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
83 // in this path. Used for loop detection.
84
85 function Parser() {
86 $this->mTemplates = array();
87 $this->mTemplatePath = array();
88 $this->mTagHooks = array();
89 $this->clearState();
90 }
91
92 function clearState() {
93 $this->mOutput = new ParserOutput;
94 $this->mAutonumber = 0;
95 $this->mLastSection = "";
96 $this->mDTopen = false;
97 $this->mVariables = false;
98 $this->mIncludeCount = array();
99 $this->mStripState = array();
100 $this->mArgStack = array();
101 $this->mInPre = false;
102 }
103
104 # First pass--just handle <nowiki> sections, pass the rest off
105 # to internalParse() which does all the real work.
106 #
107 # Returns a ParserOutput
108 #
109 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
110 global $wgUseTidy;
111 $fname = "Parser::parse";
112 wfProfileIn( $fname );
113
114 if ( $clearState ) {
115 $this->clearState();
116 }
117
118 $this->mOptions = $options;
119 $this->mTitle =& $title;
120 $this->mOutputType = OT_HTML;
121
122 $stripState = NULL;
123 $text = $this->strip( $text, $this->mStripState );
124 $text = $this->internalParse( $text, $linestart );
125 $text = $this->unstrip( $text, $this->mStripState );
126 # Clean up special characters, only run once, next-to-last before doBlockLevels
127 if(!$wgUseTidy) {
128 $fixtags = array(
129 # french spaces, last one Guillemet-left
130 # only if there is something before the space
131 '/ (?=\\?|:|;|!|\\302\\273)/' => '&nbsp;\\1',
132 '/(\d) (?=\d{3}\D)/' => '\\1&nbsp;\\2',
133 # french spaces, Guillemet-right
134 "/(\\302\\253) /"=>"\\1&nbsp;",
135 '/<hr *>/i' => '<hr />',
136 '/<br *>/i' => '<br />',
137 '/<center *>/i' => '<div class="center">',
138 '/<\\/center *>/i' => '</div>',
139 # Clean up spare ampersands; note that we probably ought to be
140 # more careful about named entities.
141 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
142 );
143 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
144 } else {
145 $fixtags = array(
146 # french spaces, last one Guillemet-left
147 '/ (?=\\?|:|;|!|\\302\\273)/' => '&nbsp;\\1',
148 '/(\d) (?=\d{3}\D)/' => '\\1&nbsp;\\2',
149 # french spaces, Guillemet-right
150 '/(\\302\\253) /' => '\\1&nbsp;',
151 '/([^> ]+(&#x30(1|3|9);)[^< ]*)/i' => '<span class="diacrit">\\1</span>',
152 '/<center *>/i' => '<div class="center">',
153 '/<\\/center *>/i' => '</div>'
154 );
155 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
156 }
157 # only once and last
158 $text = $this->doBlockLevels( $text, $linestart );
159 $text = $this->unstripNoWiki( $text, $this->mStripState );
160 if($wgUseTidy) {
161 $text = $this->tidy($text);
162 }
163 $this->mOutput->setText( $text );
164 wfProfileOut( $fname );
165 return $this->mOutput;
166 }
167
168 /* static */ function getRandomString() {
169 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
170 }
171
172 # Replaces all occurrences of <$tag>content</$tag> in the text
173 # with a random marker and returns the new text. the output parameter
174 # $content will be an associative array filled with data on the form
175 # $unique_marker => content.
176
177 # If $content is already set, the additional entries will be appended
178
179 # If $tag is set to STRIP_COMMENTS, the function will extract
180 # <!-- HTML comments -->
181
182 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
183 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
184 if ( !$content ) {
185 $content = array( );
186 }
187 $n = 1;
188 $stripped = '';
189
190 while ( '' != $text ) {
191 if($tag==STRIP_COMMENTS) {
192 $p = preg_split( '/<!--/i', $text, 2 );
193 } else {
194 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
195 }
196 $stripped .= $p[0];
197 if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
198 $text = '';
199 } else {
200 if($tag==STRIP_COMMENTS) {
201 $q = preg_split( '/-->/i', $p[1], 2 );
202 } else {
203 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
204 }
205 $marker = $rnd . sprintf('%08X', $n++);
206 $content[$marker] = $q[0];
207 $stripped .= $marker;
208 $text = $q[1];
209 }
210 }
211 return $stripped;
212 }
213
214 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
215 # If $render is set, performs necessary rendering operations on plugins
216 # Returns the text, and fills an array with data needed in unstrip()
217 # If the $state is already a valid strip state, it adds to the state
218
219 # When $stripcomments is set, HTML comments <!-- like this -->
220 # will be stripped in addition to other tags. This is important
221 # for section editing, where these comments cause confusion when
222 # counting the sections in the wikisource
223 function strip( $text, &$state, $stripcomments = false ) {
224 $render = ($this->mOutputType == OT_HTML);
225 $html_content = array();
226 $nowiki_content = array();
227 $math_content = array();
228 $pre_content = array();
229 $comment_content = array();
230 $ext_content = array();
231
232 # Replace any instances of the placeholders
233 $uniq_prefix = UNIQ_PREFIX;
234 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
235
236 # html
237 global $wgRawHtml;
238 if( $wgRawHtml ) {
239 $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
240 foreach( $html_content as $marker => $content ) {
241 if ($render ) {
242 # Raw and unchecked for validity.
243 $html_content[$marker] = $content;
244 } else {
245 $html_content[$marker] = "<html>$content</html>";
246 }
247 }
248 }
249
250 # nowiki
251 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
252 foreach( $nowiki_content as $marker => $content ) {
253 if( $render ){
254 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
255 } else {
256 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
257 }
258 }
259
260 # math
261 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
262 foreach( $math_content as $marker => $content ){
263 if( $render ) {
264 if( $this->mOptions->getUseTeX() ) {
265 $math_content[$marker] = renderMath( $content );
266 } else {
267 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
268 }
269 } else {
270 $math_content[$marker] = "<math>$content</math>";
271 }
272 }
273
274 # pre
275 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
276 foreach( $pre_content as $marker => $content ){
277 if( $render ){
278 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
279 } else {
280 $pre_content[$marker] = "<pre>$content</pre>";
281 }
282 }
283
284 # Comments
285 if($stripcomments) {
286 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
287 foreach( $comment_content as $marker => $content ){
288 $comment_content[$marker] = "<!--$content-->";
289 }
290 }
291
292 # Extensions
293 foreach ( $this->mTagHooks as $tag => $callback ) {
294 $ext_contents[$tag] = array();
295 $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
296 foreach( $ext_content[$tag] as $marker => $content ) {
297 if ( $render ) {
298 $ext_content[$tag][$marker] = $callback( $content );
299 } else {
300 $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
301 }
302 }
303 }
304
305 # Merge state with the pre-existing state, if there is one
306 if ( $state ) {
307 $state['html'] = $state['html'] + $html_content;
308 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
309 $state['math'] = $state['math'] + $math_content;
310 $state['pre'] = $state['pre'] + $pre_content;
311 $state['comment'] = $state['comment'] + $comment_content;
312
313 foreach( $ext_content as $tag => $array ) {
314 if ( array_key_exists( $tag, $state ) ) {
315 $state[$tag] = $state[$tag] + $array;
316 }
317 }
318 } else {
319 $state = array(
320 'html' => $html_content,
321 'nowiki' => $nowiki_content,
322 'math' => $math_content,
323 'pre' => $pre_content,
324 'comment' => $comment_content,
325 ) + $ext_content;
326 }
327 return $text;
328 }
329
330 # always call unstripNoWiki() after this one
331 function unstrip( $text, &$state ) {
332 # Must expand in reverse order, otherwise nested tags will be corrupted
333 $contentDict = end( $state );
334 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
335 if( key($state) != 'nowiki' && key($state) != 'html') {
336 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
337 $text = str_replace( key( $contentDict ), $content, $text );
338 }
339 }
340 }
341
342 return $text;
343 }
344 # always call this after unstrip() to preserve the order
345 function unstripNoWiki( $text, &$state ) {
346 # Must expand in reverse order, otherwise nested tags will be corrupted
347 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
348 $text = str_replace( key( $state['nowiki'] ), $content, $text );
349 }
350
351 global $wgRawHtml;
352 if ($wgRawHtml) {
353 for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
354 $text = str_replace( key( $state['html'] ), $content, $text );
355 }
356 }
357
358 return $text;
359 }
360
361 # Add an item to the strip state
362 # Returns the unique tag which must be inserted into the stripped text
363 # The tag will be replaced with the original text in unstrip()
364 function insertStripItem( $text, &$state ) {
365 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
366 if ( !$state ) {
367 $state = array(
368 'html' => array(),
369 'nowiki' => array(),
370 'math' => array(),
371 'pre' => array()
372 );
373 }
374 $state['item'][$rnd] = $text;
375 return $rnd;
376 }
377
378 # categoryMagic
379 # generate a list of subcategories and pages for a category
380 # depending on wfMsg("usenewcategorypage") it either calls the new
381 # or the old code. The new code will not work properly for some
382 # languages due to sorting issues, so they might want to turn it
383 # off.
384 function categoryMagic() {
385 $msg = wfMsg('usenewcategorypage');
386 if ( '0' == @$msg[0] )
387 {
388 return $this->oldCategoryMagic();
389 } else {
390 return $this->newCategoryMagic();
391 }
392 }
393
394 # This method generates the list of subcategories and pages for a category
395 function oldCategoryMagic () {
396 global $wgLang ;
397 $fname = 'Parser::oldCategoryMagic';
398
399 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
400
401 if ( $this->mTitle->getNamespace() != NS_CATEGORY ) return "" ; # This ain't a category page
402
403 $r = "<br style=\"clear:both;\"/>\n";
404
405
406 $sk =& $this->mOptions->getSkin() ;
407
408 $articles = array() ;
409 $children = array() ;
410 $data = array () ;
411 $id = $this->mTitle->getArticleID() ;
412
413 # FIXME: add limits
414 $dbr =& wfGetDB( DB_SLAVE );
415 $cur = $dbr->tableName( 'cur' );
416 $categorylinks = $dbr->tableName( 'categorylinks' );
417
418 $t = $dbr->strencode( $this->mTitle->getDBKey() );
419 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM $cur,$categorylinks " .
420 "WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
421 $res = $dbr->query( $sql, $fname ) ;
422 while ( $x = $dbr->fetchObject ( $res ) ) $data[] = $x ;
423
424 # For all pages that link to this category
425 foreach ( $data AS $x )
426 {
427 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
428 if ( $t != '' ) $t .= ':' ;
429 $t .= $x->cur_title ;
430
431 if ( $x->cur_namespace == NS_CATEGORY ) {
432 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
433 } else {
434 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
435 }
436 }
437 $dbr->freeResult ( $res ) ;
438
439 # Showing subcategories
440 if ( count ( $children ) > 0 ) {
441 $r .= '<h2>'.wfMsg('subcategories')."</h2>\n" ;
442 $r .= implode ( ', ' , $children ) ;
443 }
444
445 # Showing pages in this category
446 if ( count ( $articles ) > 0 ) {
447 $ti = $this->mTitle->getText() ;
448 $h = wfMsg( 'category_header', $ti );
449 $r .= "<h2>$h</h2>\n" ;
450 $r .= implode ( ', ' , $articles ) ;
451 }
452
453 return $r ;
454 }
455
456 function newCategoryMagic () {
457 global $wgLang;
458 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
459
460 if ( $this->mTitle->getNamespace() != NS_CATEGORY ) return '' ; # This ain't a category page
461
462 $r = "<br style=\"clear:both;\"/>\n";
463
464
465 $sk =& $this->mOptions->getSkin() ;
466
467 $articles = array() ;
468 $articles_start_char = array();
469 $children = array() ;
470 $children_start_char = array();
471 $data = array () ;
472 $id = $this->mTitle->getArticleID() ;
473
474 # FIXME: add limits
475 $dbr =& wfGetDB( DB_SLAVE );
476 $cur = $dbr->tableName( 'cur' );
477 $categorylinks = $dbr->tableName( 'categorylinks' );
478
479 $t = $dbr->strencode( $this->mTitle->getDBKey() );
480 $sql = "SELECT DISTINCT cur_title,cur_namespace,cl_sortkey FROM " .
481 "$cur,$categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
482 $res = $dbr->query ( $sql ) ;
483 while ( $x = $dbr->fetchObject ( $res ) )
484 {
485 $t = $ns = $wgLang->getNsText ( $x->cur_namespace ) ;
486 if ( $t != '' ) $t .= ':' ;
487 $t .= $x->cur_title ;
488
489 if ( $x->cur_namespace == NS_CATEGORY ) {
490 $ctitle = str_replace( '_',' ',$x->cur_title );
491 array_push ( $children, $sk->makeKnownLink ( $t, $ctitle ) ) ; # Subcategory
492
493 // If there's a link from Category:A to Category:B, the sortkey of the resulting
494 // entry in the categorylinks table is Category:A, not A, which it SHOULD be.
495 // Workaround: If sortkey == "Category:".$title, than use $title for sorting,
496 // else use sortkey...
497 if ( ($ns.':'.$ctitle) == $x->cl_sortkey ) {
498 array_push ( $children_start_char, $wgLang->firstChar( $x->cur_title ) );
499 } else {
500 array_push ( $children_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
501 }
502 } else {
503 array_push ( $articles , $sk->makeKnownLink ( $t ) ) ; # Page in this category
504 array_push ( $articles_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
505 }
506 }
507 $dbr->freeResult ( $res ) ;
508
509 $ti = $this->mTitle->getText() ;
510
511 # Don't show subcategories section if there are none.
512 if ( count ( $children ) > 0 )
513 {
514 # Showing subcategories
515 $r .= '<h2>' . wfMsg( 'subcategories' ) . "</h2>\n";
516
517 $numchild = count( $children );
518 if($numchild == 1) {
519 $r .= wfMsg( 'subcategorycount1', 1 );
520 } else {
521 $r .= wfMsg( 'subcategorycount' , $numchild );
522 }
523 unset($numchild);
524
525 if ( count ( $children ) > 6 ) {
526
527 // divide list into three equal chunks
528 $chunk = (int) (count ( $children ) / 3);
529
530 // get and display header
531 $r .= '<table width="100%"><tr valign="top">';
532
533 $startChunk = 0;
534 $endChunk = $chunk;
535
536 // loop through the chunks
537 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
538 $chunkIndex < 3;
539 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
540 {
541
542 $r .= '<td><ul>';
543 // output all subcategories to category
544 for ($index = $startChunk ;
545 $index < $endChunk && $index < count($children);
546 $index++ )
547 {
548 // check for change of starting letter or begging of chunk
549 if ( ($children_start_char[$index] != $children_start_char[$index - 1])
550 || ($index == $startChunk) )
551 {
552 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
553 }
554
555 $r .= "<li>{$children[$index]}</li>";
556 }
557 $r .= '</ul></td>';
558
559
560 }
561 $r .= '</tr></table>';
562 } else {
563 // for short lists of subcategories to category.
564
565 $r .= "<h3>{$children_start_char[0]}</h3>\n";
566 $r .= '<ul><li>'.$children[0].'</li>';
567 for ($index = 1; $index < count($children); $index++ )
568 {
569 if ($children_start_char[$index] != $children_start_char[$index - 1])
570 {
571 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
572 }
573
574 $r .= "<li>{$children[$index]}</li>";
575 }
576 $r .= '</ul>';
577 }
578 } # END of if ( count($children) > 0 )
579
580 $r .= '<h2>' . wfMsg( 'category_header', $ti ) . "</h2>\n";
581
582 $numart = count( $articles );
583 if($numart == 1) {
584 $r .= wfMsg( 'categoryarticlecount1', 1 );
585 } else {
586 $r .= wfMsg( 'categoryarticlecount' , $numart );
587 }
588 unset($numart);
589
590 # Showing articles in this category
591 if ( count ( $articles ) > 6) {
592 $ti = $this->mTitle->getText() ;
593
594 // divide list into three equal chunks
595 $chunk = (int) (count ( $articles ) / 3);
596
597 // get and display header
598 $r .= '<table width="100%"><tr valign="top">';
599
600 // loop through the chunks
601 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
602 $chunkIndex < 3;
603 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
604 {
605
606 $r .= '<td><ul>';
607
608 // output all articles in category
609 for ($index = $startChunk ;
610 $index < $endChunk && $index < count($articles);
611 $index++ )
612 {
613 // check for change of starting letter or begging of chunk
614 if ( ($articles_start_char[$index] != $articles_start_char[$index - 1])
615 || ($index == $startChunk) )
616 {
617 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
618 }
619
620 $r .= "<li>{$articles[$index]}</li>";
621 }
622 $r .= '</ul></td>';
623
624
625 }
626 $r .= '</tr></table>';
627 } elseif ( count($articles) > 0) {
628 // for short lists of articles in categories.
629 $ti = $this->mTitle->getText() ;
630
631 $r .= '<h3>'.$articles_start_char[0]."</h3>\n";
632 $r .= '<ul><li>'.$articles[0].'</li>';
633 for ($index = 1; $index < count($articles); $index++ )
634 {
635 if ($articles_start_char[$index] != $articles_start_char[$index - 1])
636 {
637 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
638 }
639
640 $r .= "<li>{$articles[$index]}</li>";
641 }
642 $r .= '</ul>';
643 }
644
645
646 return $r ;
647 }
648
649 # Return allowed HTML attributes
650 function getHTMLattrs () {
651 $htmlattrs = array( # Allowed attributes--no scripting, etc.
652 'title', 'align', 'lang', 'dir', 'width', 'height',
653 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
654 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
655 /* FONT */ 'type', 'start', 'value', 'compact',
656 /* For various lists, mostly deprecated but safe */
657 'summary', 'width', 'border', 'frame', 'rules',
658 'cellspacing', 'cellpadding', 'valign', 'char',
659 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
660 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
661 'id', 'class', 'name', 'style' /* For CSS */
662 );
663 return $htmlattrs ;
664 }
665
666 # Remove non approved attributes and javascript in css
667 function fixTagAttributes ( $t ) {
668 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
669 $htmlattrs = $this->getHTMLattrs() ;
670
671 # Strip non-approved attributes from the tag
672 $t = preg_replace(
673 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
674 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
675 $t);
676
677 $t = str_replace ( "<></>" , "" , $t ) ; # This should fix bug 980557
678
679 # Strip javascript "expression" from stylesheets. Brute force approach:
680 # If anythin offensive is found, all attributes of the HTML tag are dropped
681
682 if( preg_match(
683 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
684 wfMungeToUtf8( $t ) ) )
685 {
686 $t='';
687 }
688
689 return trim ( $t ) ;
690 }
691
692 # interface with html tidy, used if $wgUseTidy = true
693 function tidy ( $text ) {
694 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
695 global $wgInputEncoding, $wgOutputEncoding;
696 $fname = 'Parser::tidy';
697 wfProfileIn( $fname );
698
699 $cleansource = '';
700 switch(strtoupper($wgOutputEncoding)) {
701 case 'ISO-8859-1':
702 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
703 break;
704 case 'UTF-8':
705 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
706 break;
707 default:
708 $wgTidyOpts .= ' -raw';
709 }
710
711 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
712 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
713 '<head><title>test</title></head><body>'.$text.'</body></html>';
714 $descriptorspec = array(
715 0 => array('pipe', 'r'),
716 1 => array('pipe', 'w'),
717 2 => array('file', '/dev/null', 'a')
718 );
719 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
720 if (is_resource($process)) {
721 fwrite($pipes[0], $wrappedtext);
722 fclose($pipes[0]);
723 while (!feof($pipes[1])) {
724 $cleansource .= fgets($pipes[1], 1024);
725 }
726 fclose($pipes[1]);
727 $return_value = proc_close($process);
728 }
729
730 wfProfileOut( $fname );
731
732 if( $cleansource == '' && $text != '') {
733 wfDebug( "Tidy error detected!\n" );
734 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
735 } else {
736 return $cleansource;
737 }
738 }
739
740 # parse the wiki syntax used to render tables
741 function doTableStuff ( $t ) {
742 $fname = 'Parser::doTableStuff';
743 wfProfileIn( $fname );
744
745 $t = explode ( "\n" , $t ) ;
746 $td = array () ; # Is currently a td tag open?
747 $ltd = array () ; # Was it TD or TH?
748 $tr = array () ; # Is currently a tr tag open?
749 $ltr = array () ; # tr attributes
750 $indent_level = 0; # indent level of the table
751 foreach ( $t AS $k => $x )
752 {
753 $x = trim ( $x ) ;
754 $fc = substr ( $x , 0 , 1 ) ;
755 if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
756 $indent_level = strlen( $matches[1] );
757 $t[$k] = "\n" .
758 str_repeat( "<dl><dd>", $indent_level ) .
759 "<table " . $this->fixTagAttributes ( $matches[2] ) . '>' ;
760 array_push ( $td , false ) ;
761 array_push ( $ltd , '' ) ;
762 array_push ( $tr , false ) ;
763 array_push ( $ltr , '' ) ;
764 }
765 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
766 else if ( '|}' == substr ( $x , 0 , 2 ) ) {
767 $z = "</table>\n" ;
768 $l = array_pop ( $ltd ) ;
769 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
770 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
771 array_pop ( $ltr ) ;
772 $t[$k] = $z . str_repeat( "</dd></dl>", $indent_level );
773 }
774 else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
775 $x = substr ( $x , 1 ) ;
776 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
777 $z = '' ;
778 $l = array_pop ( $ltd ) ;
779 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
780 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
781 array_pop ( $ltr ) ;
782 $t[$k] = $z ;
783 array_push ( $tr , false ) ;
784 array_push ( $td , false ) ;
785 array_push ( $ltd , '' ) ;
786 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
787 }
788 else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
789 if ( '|+' == substr ( $x , 0 , 2 ) ) {
790 $fc = '+' ;
791 $x = substr ( $x , 1 ) ;
792 }
793 $after = substr ( $x , 1 ) ;
794 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
795 $after = explode ( '||' , $after ) ;
796 $t[$k] = '' ;
797 foreach ( $after AS $theline )
798 {
799 $z = '' ;
800 if ( $fc != '+' )
801 {
802 $tra = array_pop ( $ltr ) ;
803 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
804 array_push ( $tr , true ) ;
805 array_push ( $ltr , '' ) ;
806 }
807
808 $l = array_pop ( $ltd ) ;
809 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
810 if ( $fc == '|' ) $l = 'td' ;
811 else if ( $fc == '!' ) $l = 'th' ;
812 else if ( $fc == '+' ) $l = 'caption' ;
813 else $l = '' ;
814 array_push ( $ltd , $l ) ;
815 $y = explode ( '|' , $theline , 2 ) ;
816 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
817 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
818 $t[$k] .= $y ;
819 array_push ( $td , true ) ;
820 }
821 }
822 }
823
824 # Closing open td, tr && table
825 while ( count ( $td ) > 0 )
826 {
827 if ( array_pop ( $td ) ) $t[] = '</td>' ;
828 if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
829 $t[] = '</table>' ;
830 }
831
832 $t = implode ( "\n" , $t ) ;
833 # $t = $this->removeHTMLtags( $t );
834 wfProfileOut( $fname );
835 return $t ;
836 }
837
838 # Parses the text and adds the result to the strip state
839 # Returns the strip tag
840 function stripParse( $text, $newline, $args ) {
841 $text = $this->strip( $text, $this->mStripState );
842 $text = $this->internalParse( $text, (bool)$newline, $args, false );
843 return $newline.$this->insertStripItem( $text, $this->mStripState );
844 }
845
846 function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
847 $fname = 'Parser::internalParse';
848 wfProfileIn( $fname );
849
850 $text = $this->removeHTMLtags( $text );
851 $text = $this->replaceVariables( $text, $args );
852
853 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
854
855 $text = $this->doHeadings( $text );
856 if($this->mOptions->getUseDynamicDates()) {
857 global $wgDateFormatter;
858 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
859 }
860 $text = $this->doAllQuotes( $text );
861 $text = $this->replaceExternalLinks( $text );
862 $text = $this->doMagicLinks( $text );
863 $text = $this->replaceInternalLinks ( $text );
864 $text = $this->replaceInternalLinks ( $text );
865
866 $text = $this->unstrip( $text, $this->mStripState );
867 $text = $this->unstripNoWiki( $text, $this->mStripState );
868
869 $text = $this->doTableStuff( $text );
870 $text = $this->formatHeadings( $text, $isMain );
871 $sk =& $this->mOptions->getSkin();
872 $text = $sk->transformContent( $text );
873
874 if ( $isMain && !isset ( $this->categoryMagicDone ) ) {
875 $text .= $this->categoryMagic () ;
876 $this->categoryMagicDone = true ;
877 }
878
879 wfProfileOut( $fname );
880 return $text;
881 }
882
883 /* private */ function &doMagicLinks( &$text ) {
884 global $wgUseGeoMode;
885 $text = $this->magicISBN( $text );
886 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
887 $text = $this->magicGEO( $text );
888 }
889 $text = $this->magicRFC( $text );
890 return $text;
891 }
892
893 # Parse ^^ tokens and return html
894 /* private */ function doExponent ( $text ) {
895 $fname = 'Parser::doExponent';
896 wfProfileIn( $fname);
897 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
898 wfProfileOut( $fname);
899 return $text;
900 }
901
902 # Parse headers and return html
903 /* private */ function doHeadings( $text ) {
904 $fname = 'Parser::doHeadings';
905 wfProfileIn( $fname );
906 for ( $i = 6; $i >= 1; --$i ) {
907 $h = substr( '======', 0, $i );
908 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
909 "<h{$i}>\\1</h{$i}>\\2", $text );
910 }
911 wfProfileOut( $fname );
912 return $text;
913 }
914
915 /* private */ function doAllQuotes( $text ) {
916 $fname = 'Parser::doAllQuotes';
917 wfProfileIn( $fname );
918 $outtext = '';
919 $lines = explode( "\n", $text );
920 foreach ( $lines as $line ) {
921 $outtext .= $this->doQuotes ( $line ) . "\n";
922 }
923 $outtext = substr($outtext, 0,-1);
924 wfProfileOut( $fname );
925 return $outtext;
926 }
927
928 /* private */ function doQuotes( $text ) {
929 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
930 if (count ($arr) == 1)
931 return $text;
932 else
933 {
934 # First, do some preliminary work. This may shift some apostrophes from
935 # being mark-up to being text. It also counts the number of occurrences
936 # of bold and italics mark-ups.
937 $i = 0;
938 $numbold = 0;
939 $numitalics = 0;
940 foreach ($arr as $r)
941 {
942 if (($i % 2) == 1)
943 {
944 # If there are ever four apostrophes, assume the first is supposed to
945 # be text, and the remaining three constitute mark-up for bold text.
946 if (strlen ($arr[$i]) == 4)
947 {
948 $arr[$i-1] .= "'";
949 $arr[$i] = "'''";
950 }
951 # If there are more than 5 apostrophes in a row, assume they're all
952 # text except for the last 5.
953 else if (strlen ($arr[$i]) > 5)
954 {
955 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
956 $arr[$i] = "'''''";
957 }
958 # Count the number of occurrences of bold and italics mark-ups.
959 # We are not counting sequences of five apostrophes.
960 if (strlen ($arr[$i]) == 2) $numitalics++; else
961 if (strlen ($arr[$i]) == 3) $numbold++; else
962 if (strlen ($arr[$i]) == 5) { $numitalics++; $numbold++; }
963 }
964 $i++;
965 }
966
967 # If there is an odd number of both bold and italics, it is likely
968 # that one of the bold ones was meant to be an apostrophe followed
969 # by italics. Which one we cannot know for certain, but it is more
970 # likely to be one that has a single-letter word before it.
971 if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
972 {
973 $i = 0;
974 $firstsingleletterword = -1;
975 $firstmultiletterword = -1;
976 $firstspace = -1;
977 foreach ($arr as $r)
978 {
979 if (($i % 2 == 1) and (strlen ($r) == 3))
980 {
981 $x1 = substr ($arr[$i-1], -1);
982 $x2 = substr ($arr[$i-1], -2, 1);
983 if ($x1 == " ") {
984 if ($firstspace == -1) $firstspace = $i;
985 } else if ($x2 == " ") {
986 if ($firstsingleletterword == -1) $firstsingleletterword = $i;
987 } else {
988 if ($firstmultiletterword == -1) $firstmultiletterword = $i;
989 }
990 }
991 $i++;
992 }
993
994 # If there is a single-letter word, use it!
995 if ($firstsingleletterword > -1)
996 {
997 $arr [ $firstsingleletterword ] = "''";
998 $arr [ $firstsingleletterword-1 ] .= "'";
999 }
1000 # If not, but there's a multi-letter word, use that one.
1001 else if ($firstmultiletterword > -1)
1002 {
1003 $arr [ $firstmultiletterword ] = "''";
1004 $arr [ $firstmultiletterword-1 ] .= "'";
1005 }
1006 # ... otherwise use the first one that has neither.
1007 # (notice that it is possible for all three to be -1 if, for example,
1008 # there is only one pentuple-apostrophe in the line)
1009 else if ($firstspace > -1)
1010 {
1011 $arr [ $firstspace ] = "''";
1012 $arr [ $firstspace-1 ] .= "'";
1013 }
1014 }
1015
1016 # Now let's actually convert our apostrophic mush to HTML!
1017 $output = '';
1018 $buffer = '';
1019 $state = '';
1020 $i = 0;
1021 foreach ($arr as $r)
1022 {
1023 if (($i % 2) == 0)
1024 {
1025 if ($state == 'both')
1026 $buffer .= $r;
1027 else
1028 $output .= $r;
1029 }
1030 else
1031 {
1032 if (strlen ($r) == 2)
1033 {
1034 if ($state == 'em')
1035 { $output .= "</em>"; $state = ''; }
1036 else if ($state == 'strongem')
1037 { $output .= "</em>"; $state = 'strong'; }
1038 else if ($state == 'emstrong')
1039 { $output .= "</strong></em><strong>"; $state = 'strong'; }
1040 else if ($state == 'both')
1041 { $output .= "<strong><em>{$buffer}</em>"; $state = 'strong'; }
1042 else # $state can be 'strong' or ''
1043 { $output .= "<em>"; $state .= 'em'; }
1044 }
1045 else if (strlen ($r) == 3)
1046 {
1047 if ($state == 'strong')
1048 { $output .= "</strong>"; $state = ''; }
1049 else if ($state == 'strongem')
1050 { $output .= "</em></strong><em>"; $state = 'em'; }
1051 else if ($state == 'emstrong')
1052 { $output .= "</strong>"; $state = 'em'; }
1053 else if ($state == 'both')
1054 { $output .= "<em><strong>{$buffer}</strong>"; $state = 'em'; }
1055 else # $state can be 'em' or ''
1056 { $output .= "<strong>"; $state .= 'strong'; }
1057 }
1058 else if (strlen ($r) == 5)
1059 {
1060 if ($state == 'strong')
1061 { $output .= "</strong><em>"; $state = 'em'; }
1062 else if ($state == 'em')
1063 { $output .= "</em><strong>"; $state = 'strong'; }
1064 else if ($state == 'strongem')
1065 { $output .= "</em></strong>"; $state = ''; }
1066 else if ($state == 'emstrong')
1067 { $output .= "</strong></em>"; $state = ''; }
1068 else if ($state == 'both')
1069 { $output .= "<em><strong>{$buffer}</strong></em>"; $state = ''; }
1070 else # ($state == '')
1071 { $buffer = ''; $state = 'both'; }
1072 }
1073 }
1074 $i++;
1075 }
1076 # Now close all remaining tags. Notice that the order is important.
1077 if ($state == 'strong' || $state == 'emstrong')
1078 $output .= '</strong>';
1079 if ($state == 'em' || $state == 'strongem' || $state == 'emstrong')
1080 $output .= '</em>';
1081 if ($state == 'strongem')
1082 $output .= '</strong>';
1083 if ($state == 'both')
1084 $output .= "<strong><em>{$buffer}</em></strong>";
1085 return $output;
1086 }
1087 }
1088
1089 # Note: we have to do external links before the internal ones,
1090 # and otherwise take great care in the order of things here, so
1091 # that we don't end up interpreting some URLs twice.
1092
1093 /* private */ function replaceExternalLinks( $text ) {
1094 $fname = 'Parser::replaceExternalLinks';
1095 wfProfileIn( $fname );
1096
1097 $sk =& $this->mOptions->getSkin();
1098 $linktrail = wfMsg('linktrail');
1099 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1100
1101 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
1102
1103 $i = 0;
1104 while ( $i<count( $bits ) ) {
1105 $url = $bits[$i++];
1106 $protocol = $bits[$i++];
1107 $text = $bits[$i++];
1108 $trail = $bits[$i++];
1109
1110 # If the link text is an image URL, replace it with an <img> tag
1111 # This happened by accident in the original parser, but some people used it extensively
1112 $img = $this->maybeMakeImageLink( $text );
1113 if ( $img !== false ) {
1114 $text = $img;
1115 }
1116
1117 $dtrail = '';
1118
1119 # No link text, e.g. [http://domain.tld/some.link]
1120 if ( $text == '' ) {
1121 # Autonumber if allowed
1122 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
1123 $text = "[" . ++$this->mAutonumber . "]";
1124 } else {
1125 # Otherwise just use the URL
1126 $text = htmlspecialchars( $url );
1127 }
1128 } else {
1129 # Have link text, e.g. [http://domain.tld/some.link text]s
1130 # Check for trail
1131 if ( preg_match( $linktrail, $trail, $m2 ) ) {
1132 $dtrail = $m2[1];
1133 $trail = $m2[2];
1134 }
1135 }
1136
1137 $encUrl = htmlspecialchars( $url );
1138 # Bit in parentheses showing the URL for the printable version
1139 if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $url ) ) {
1140 $paren = '';
1141 } else {
1142 # Expand the URL for printable version
1143 if ( ! $sk->suppressUrlExpansion() ) {
1144 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
1145 } else {
1146 $paren = '';
1147 }
1148 }
1149
1150 # Process the trail (i.e. everything after this link up until start of the next link),
1151 # replacing any non-bracketed links
1152 $trail = $this->replaceFreeExternalLinks( $trail );
1153
1154 $la = $sk->getExternalLinkAttributes( $url, $text );
1155
1156 # Use the encoded URL
1157 # This means that users can paste URLs directly into the text
1158 # Funny characters like &ouml; aren't valid in URLs anyway
1159 # This was changed in August 2004
1160 $s .= "<a href=\"{$url}\" {$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
1161 }
1162
1163 wfProfileOut( $fname );
1164 return $s;
1165 }
1166
1167 # Replace anything that looks like a URL with a link
1168 function replaceFreeExternalLinks( $text ) {
1169 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1170 $s = array_shift( $bits );
1171 $i = 0;
1172
1173 $sk =& $this->mOptions->getSkin();
1174
1175 while ( $i < count( $bits ) ){
1176 $protocol = $bits[$i++];
1177 $remainder = $bits[$i++];
1178
1179 if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
1180 # Found some characters after the protocol that look promising
1181 $url = $protocol . $m[1];
1182 $trail = $m[2];
1183
1184 # Move trailing punctuation to $trail
1185 $sep = ',;\.:!?';
1186 # If there is no left bracket, then consider right brackets fair game too
1187 if ( strpos( $url, '(' ) === false ) {
1188 $sep .= ')';
1189 }
1190
1191 $numSepChars = strspn( strrev( $url ), $sep );
1192 if ( $numSepChars ) {
1193 $trail = substr( $url, -$numSepChars ) . $trail;
1194 $url = substr( $url, 0, -$numSepChars );
1195 }
1196
1197 # Replace &amp; from obsolete syntax with &
1198 $url = str_replace( '&amp;', '&', $url );
1199
1200 # Is this an external image?
1201 $text = $this->maybeMakeImageLink( $url );
1202 if ( $text === false ) {
1203 # Not an image, make a link
1204 $text = $sk->makeExternalLink( $url, $url );
1205 }
1206 $s .= $text . $trail;
1207 } else {
1208 $s .= $protocol . $remainder;
1209 }
1210 }
1211 return $s;
1212 }
1213
1214 # make an image if it's allowed
1215 function maybeMakeImageLink( $url ) {
1216 $sk =& $this->mOptions->getSkin();
1217 $text = false;
1218 if ( $this->mOptions->getAllowExternalImages() ) {
1219 if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
1220 # Image found
1221 $text = $sk->makeImage( htmlspecialchars( $url ) );
1222 }
1223 }
1224 return $text;
1225 }
1226
1227 # The wikilinks [[ ]] are procedeed here.
1228 /* private */ function replaceInternalLinks( $s ) {
1229 global $wgLang, $wgLinkCache;
1230 global $wgNamespacesWithSubpages, $wgLanguageCode;
1231 static $fname = 'Parser::replaceInternalLinks' ;
1232 wfProfileIn( $fname );
1233
1234 wfProfileIn( $fname.'-setup' );
1235 static $tc = FALSE;
1236 # the % is needed to support urlencoded titles as well
1237 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
1238 $sk =& $this->mOptions->getSkin();
1239
1240 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
1241
1242 $a = explode( '[[', ' ' . $s );
1243 $s = array_shift( $a );
1244 $s = substr( $s, 1 );
1245
1246 # Match a link having the form [[namespace:link|alternate]]trail
1247 static $e1 = FALSE;
1248 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1249 # Match the end of a line for a word that's not followed by whitespace,
1250 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1251 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1252
1253 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
1254 # Special and Media are pseudo-namespaces; no pages actually exist in them
1255
1256 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1257
1258 if ( $useLinkPrefixExtension ) {
1259 if ( preg_match( $e2, $s, $m ) ) {
1260 $first_prefix = $m[2];
1261 $s = $m[1];
1262 } else {
1263 $first_prefix = false;
1264 }
1265 } else {
1266 $prefix = '';
1267 }
1268
1269 wfProfileOut( $fname.'-setup' );
1270
1271 # start procedeeding each line
1272 foreach ( $a as $line ) {
1273 wfProfileIn( $fname.'-prefixhandling' );
1274 if ( $useLinkPrefixExtension ) {
1275 if ( preg_match( $e2, $s, $m ) ) {
1276 $prefix = $m[2];
1277 $s = $m[1];
1278 } else {
1279 $prefix='';
1280 }
1281 # first link
1282 if($first_prefix) {
1283 $prefix = $first_prefix;
1284 $first_prefix = false;
1285 }
1286 }
1287 wfProfileOut( $fname.'-prefixhandling' );
1288
1289 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1290 $text = $m[2];
1291 # fix up urlencoded title texts
1292 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1293 $trail = $m[3];
1294 } else { # Invalid form; output directly
1295 $s .= $prefix . '[[' . $line ;
1296 continue;
1297 }
1298
1299 # Valid link forms:
1300 # Foobar -- normal
1301 # :Foobar -- override special treatment of prefix (images, language links)
1302 # /Foobar -- convert to CurrentPage/Foobar
1303 # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1304
1305 # Look at the first character
1306 $c = substr($m[1],0,1);
1307 $noforce = ($c != ':');
1308
1309 # subpage
1310 if( $c == '/' ) {
1311 # / at end means we don't want the slash to be shown
1312 if(substr($m[1],-1,1)=='/') {
1313 $m[1]=substr($m[1],1,strlen($m[1])-2);
1314 $noslash=$m[1];
1315 } else {
1316 $noslash=substr($m[1],1);
1317 }
1318
1319 # Some namespaces don't allow subpages
1320 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) {
1321 # subpages allowed here
1322 $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1323 if( '' == $text ) {
1324 $text= $m[1];
1325 } # this might be changed for ugliness reasons
1326 } else {
1327 # no subpage allowed, use standard link
1328 $link = $noslash;
1329 }
1330
1331 } elseif( $noforce ) { # no subpage
1332 $link = $m[1];
1333 } else {
1334 # We don't want to keep the first character
1335 $link = substr( $m[1], 1 );
1336 }
1337
1338 $wasblank = ( '' == $text );
1339 if( $wasblank ) $text = $link;
1340
1341 $nt = Title::newFromText( $link );
1342 if( !$nt ) {
1343 $s .= $prefix . '[[' . $line;
1344 continue;
1345 }
1346
1347 $ns = $nt->getNamespace();
1348 $iw = $nt->getInterWiki();
1349
1350 # Link not escaped by : , create the various objects
1351 if( $noforce ) {
1352
1353 # Interwikis
1354 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1355 array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1356 $tmp = $prefix . $trail ;
1357 $s .= (trim($tmp) == '')? '': $tmp;
1358 continue;
1359 }
1360
1361 if ( $ns == NS_IMAGE ) {
1362 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1363 $wgLinkCache->addImageLinkObj( $nt );
1364 continue;
1365 }
1366
1367 if ( $ns == NS_CATEGORY ) {
1368 $t = $nt->getText() ;
1369 $nnt = Title::newFromText ( Namespace::getCanonicalName(NS_CATEGORY).":".$t ) ;
1370
1371 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1372 $pPLC=$sk->postParseLinkColour();
1373 $sk->postParseLinkColour( false );
1374 $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1375 $sk->postParseLinkColour( $pPLC );
1376 $wgLinkCache->resume();
1377
1378 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
1379 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1380 $this->mOutput->mCategoryLinks[] = $t ;
1381 $s .= $prefix . $trail ;
1382 continue;
1383 }
1384 }
1385
1386 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
1387 ( strpos( $link, '#' ) == FALSE ) ) {
1388 # Self-links are handled specially; generally de-link and change to bold.
1389 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1390 continue;
1391 }
1392
1393 if( $ns == NS_MEDIA ) {
1394 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1395 $wgLinkCache->addImageLinkObj( $nt );
1396 continue;
1397 } elseif( $ns == NS_SPECIAL ) {
1398 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1399 continue;
1400 }
1401 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1402 }
1403 wfProfileOut( $fname );
1404 return $s;
1405 }
1406
1407 # Some functions here used by doBlockLevels()
1408 #
1409 /* private */ function closeParagraph() {
1410 $result = '';
1411 if ( '' != $this->mLastSection ) {
1412 $result = '</' . $this->mLastSection . ">\n";
1413 }
1414 $this->mInPre = false;
1415 $this->mLastSection = '';
1416 return $result;
1417 }
1418 # getCommon() returns the length of the longest common substring
1419 # of both arguments, starting at the beginning of both.
1420 #
1421 /* private */ function getCommon( $st1, $st2 ) {
1422 $fl = strlen( $st1 );
1423 $shorter = strlen( $st2 );
1424 if ( $fl < $shorter ) { $shorter = $fl; }
1425
1426 for ( $i = 0; $i < $shorter; ++$i ) {
1427 if ( $st1{$i} != $st2{$i} ) { break; }
1428 }
1429 return $i;
1430 }
1431 # These next three functions open, continue, and close the list
1432 # element appropriate to the prefix character passed into them.
1433 #
1434 /* private */ function openList( $char ) {
1435 $result = $this->closeParagraph();
1436
1437 if ( '*' == $char ) { $result .= '<ul><li>'; }
1438 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1439 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1440 else if ( ';' == $char ) {
1441 $result .= '<dl><dt>';
1442 $this->mDTopen = true;
1443 }
1444 else { $result = '<!-- ERR 1 -->'; }
1445
1446 return $result;
1447 }
1448
1449 /* private */ function nextItem( $char ) {
1450 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1451 else if ( ':' == $char || ';' == $char ) {
1452 $close = '</dd>';
1453 if ( $this->mDTopen ) { $close = '</dt>'; }
1454 if ( ';' == $char ) {
1455 $this->mDTopen = true;
1456 return $close . '<dt>';
1457 } else {
1458 $this->mDTopen = false;
1459 return $close . '<dd>';
1460 }
1461 }
1462 return '<!-- ERR 2 -->';
1463 }
1464
1465 /* private */ function closeList( $char ) {
1466 if ( '*' == $char ) { $text = '</li></ul>'; }
1467 else if ( '#' == $char ) { $text = '</li></ol>'; }
1468 else if ( ':' == $char ) {
1469 if ( $this->mDTopen ) {
1470 $this->mDTopen = false;
1471 $text = '</dt></dl>';
1472 } else {
1473 $text = '</dd></dl>';
1474 }
1475 }
1476 else { return '<!-- ERR 3 -->'; }
1477 return $text."\n";
1478 }
1479
1480 /* private */ function doBlockLevels( $text, $linestart ) {
1481 $fname = 'Parser::doBlockLevels';
1482 wfProfileIn( $fname );
1483
1484 # Parsing through the text line by line. The main thing
1485 # happening here is handling of block-level elements p, pre,
1486 # and making lists from lines starting with * # : etc.
1487 #
1488 $textLines = explode( "\n", $text );
1489
1490 $lastPrefix = $output = $lastLine = '';
1491 $this->mDTopen = $inBlockElem = false;
1492 $prefixLength = 0;
1493 $paragraphStack = false;
1494
1495 if ( !$linestart ) {
1496 $output .= array_shift( $textLines );
1497 }
1498 foreach ( $textLines as $oLine ) {
1499 $lastPrefixLength = strlen( $lastPrefix );
1500 $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1501 $preOpenMatch = preg_match('/<pre/i', $oLine );
1502 if ( !$this->mInPre ) {
1503 # Multiple prefixes may abut each other for nested lists.
1504 $prefixLength = strspn( $oLine, '*#:;' );
1505 $pref = substr( $oLine, 0, $prefixLength );
1506
1507 # eh?
1508 $pref2 = str_replace( ';', ':', $pref );
1509 $t = substr( $oLine, $prefixLength );
1510 $this->mInPre = !empty($preOpenMatch);
1511 } else {
1512 # Don't interpret any other prefixes in preformatted text
1513 $prefixLength = 0;
1514 $pref = $pref2 = '';
1515 $t = $oLine;
1516 }
1517
1518 # List generation
1519 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1520 # Same as the last item, so no need to deal with nesting or opening stuff
1521 $output .= $this->nextItem( substr( $pref, -1 ) );
1522 $paragraphStack = false;
1523
1524 if ( substr( $pref, -1 ) == ';') {
1525 # The one nasty exception: definition lists work like this:
1526 # ; title : definition text
1527 # So we check for : in the remainder text to split up the
1528 # title and definition, without b0rking links.
1529 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1530 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1531 $term = $match[1];
1532 $output .= $term . $this->nextItem( ':' );
1533 $t = $match[2];
1534 }
1535 }
1536 } elseif( $prefixLength || $lastPrefixLength ) {
1537 # Either open or close a level...
1538 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1539 $paragraphStack = false;
1540
1541 while( $commonPrefixLength < $lastPrefixLength ) {
1542 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1543 --$lastPrefixLength;
1544 }
1545 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1546 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1547 }
1548 while ( $prefixLength > $commonPrefixLength ) {
1549 $char = substr( $pref, $commonPrefixLength, 1 );
1550 $output .= $this->openList( $char );
1551
1552 if ( ';' == $char ) {
1553 # FIXME: This is dupe of code above
1554 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1555 $term = $match[1];
1556 $output .= $term . $this->nextItem( ":" );
1557 $t = $match[2];
1558 }
1559 }
1560 ++$commonPrefixLength;
1561 }
1562 $lastPrefix = $pref2;
1563 }
1564 if( 0 == $prefixLength ) {
1565 # No prefix (not in list)--go to paragraph mode
1566 $uniq_prefix = UNIQ_PREFIX;
1567 // XXX: use a stack for nestable elements like span, table and div
1568 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1569 $closematch = preg_match(
1570 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1571 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1572 if ( $openmatch or $closematch ) {
1573 $paragraphStack = false;
1574 $output .= $this->closeParagraph();
1575 if($preOpenMatch and !$preCloseMatch) {
1576 $this->mInPre = true;
1577 }
1578 if ( $closematch ) {
1579 $inBlockElem = false;
1580 } else {
1581 $inBlockElem = true;
1582 }
1583 } else if ( !$inBlockElem && !$this->mInPre ) {
1584 if ( " " == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1585 // pre
1586 if ($this->mLastSection != 'pre') {
1587 $paragraphStack = false;
1588 $output .= $this->closeParagraph().'<pre>';
1589 $this->mLastSection = 'pre';
1590 }
1591 } else {
1592 // paragraph
1593 if ( '' == trim($t) ) {
1594 if ( $paragraphStack ) {
1595 $output .= $paragraphStack.'<br />';
1596 $paragraphStack = false;
1597 $this->mLastSection = 'p';
1598 } else {
1599 if ($this->mLastSection != 'p' ) {
1600 $output .= $this->closeParagraph();
1601 $this->mLastSection = '';
1602 $paragraphStack = '<p>';
1603 } else {
1604 $paragraphStack = '</p><p>';
1605 }
1606 }
1607 } else {
1608 if ( $paragraphStack ) {
1609 $output .= $paragraphStack;
1610 $paragraphStack = false;
1611 $this->mLastSection = 'p';
1612 } else if ($this->mLastSection != 'p') {
1613 $output .= $this->closeParagraph().'<p>';
1614 $this->mLastSection = 'p';
1615 }
1616 }
1617 }
1618 }
1619 }
1620 if ($paragraphStack === false) {
1621 $output .= $t."\n";
1622 }
1623 }
1624 while ( $prefixLength ) {
1625 $output .= $this->closeList( $pref2{$prefixLength-1} );
1626 --$prefixLength;
1627 }
1628 if ( '' != $this->mLastSection ) {
1629 $output .= '</' . $this->mLastSection . '>';
1630 $this->mLastSection = '';
1631 }
1632
1633 wfProfileOut( $fname );
1634 return $output;
1635 }
1636
1637 # Return value of a magic variable (like PAGENAME)
1638 function getVariableValue( $index ) {
1639 global $wgLang, $wgSitename, $wgServer;
1640
1641 switch ( $index ) {
1642 case MAG_CURRENTMONTH:
1643 return $wgLang->formatNum( date( 'm' ) );
1644 case MAG_CURRENTMONTHNAME:
1645 return $wgLang->getMonthName( date('n') );
1646 case MAG_CURRENTMONTHNAMEGEN:
1647 return $wgLang->getMonthNameGen( date('n') );
1648 case MAG_CURRENTDAY:
1649 return $wgLang->formatNum( date('j') );
1650 case MAG_PAGENAME:
1651 return $this->mTitle->getText();
1652 case MAG_PAGENAMEE:
1653 return $this->mTitle->getPartialURL();
1654 case MAG_NAMESPACE:
1655 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1656 return $wgLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
1657 case MAG_CURRENTDAYNAME:
1658 return $wgLang->getWeekdayName( date('w')+1 );
1659 case MAG_CURRENTYEAR:
1660 return $wgLang->formatNum( date( 'Y' ) );
1661 case MAG_CURRENTTIME:
1662 return $wgLang->time( wfTimestampNow(), false );
1663 case MAG_NUMBEROFARTICLES:
1664 return $wgLang->formatNum( wfNumberOfArticles() );
1665 case MAG_SITENAME:
1666 return $wgSitename;
1667 case MAG_SERVER:
1668 return $wgServer;
1669 default:
1670 return NULL;
1671 }
1672 }
1673
1674 # initialise the magic variables (like CURRENTMONTHNAME)
1675 function initialiseVariables() {
1676 global $wgVariableIDs;
1677 $this->mVariables = array();
1678 foreach ( $wgVariableIDs as $id ) {
1679 $mw =& MagicWord::get( $id );
1680 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1681 }
1682 }
1683
1684 /* private */ function replaceVariables( $text, $args = array() ) {
1685 global $wgLang, $wgScript, $wgArticlePath;
1686
1687 # Prevent too big inclusions
1688 if(strlen($text)> MAX_INCLUDE_SIZE)
1689 return $text;
1690
1691 $fname = 'Parser::replaceVariables';
1692 wfProfileIn( $fname );
1693
1694 $bail = false;
1695 $titleChars = Title::legalChars();
1696 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars );
1697
1698 # This function is called recursively. To keep track of arguments we need a stack:
1699 array_push( $this->mArgStack, $args );
1700
1701 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1702 $GLOBALS['wgCurParser'] =& $this;
1703
1704
1705 if ( $this->mOutputType == OT_HTML ) {
1706 # Variable substitution
1707 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
1708
1709 # Argument substitution
1710 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1711 }
1712 # Template substitution
1713 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
1714 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1715
1716 array_pop( $this->mArgStack );
1717
1718 wfProfileOut( $fname );
1719 return $text;
1720 }
1721
1722 function variableSubstitution( $matches ) {
1723 if ( !$this->mVariables ) {
1724 $this->initialiseVariables();
1725 }
1726 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1727 $text = $this->mVariables[$matches[1]];
1728 $this->mOutput->mContainsOldMagic = true;
1729 } else {
1730 $text = $matches[0];
1731 }
1732 return $text;
1733 }
1734
1735 # Split template arguments
1736 function getTemplateArgs( $argsString ) {
1737 if ( $argsString === '' ) {
1738 return array();
1739 }
1740
1741 $args = explode( '|', substr( $argsString, 1 ) );
1742
1743 # If any of the arguments contains a '[[' but no ']]', it needs to be
1744 # merged with the next arg because the '|' character between belongs
1745 # to the link syntax and not the template parameter syntax.
1746 $argc = count($args);
1747 $i = 0;
1748 for ( $i = 0; $i < $argc-1; $i++ ) {
1749 if ( substr_count ( $args[$i], "[[" ) != substr_count ( $args[$i], "]]" ) ) {
1750 $args[$i] .= "|".$args[$i+1];
1751 array_splice($args, $i+1, 1);
1752 $i--;
1753 $argc--;
1754 }
1755 }
1756
1757 return $args;
1758 }
1759
1760 function braceSubstitution( $matches ) {
1761 global $wgLinkCache, $wgLang;
1762 $fname = 'Parser::braceSubstitution';
1763 $found = false;
1764 $nowiki = false;
1765 $noparse = false;
1766
1767 $title = NULL;
1768
1769 # $newline is an optional newline character before the braces
1770 # $part1 is the bit before the first |, and must contain only title characters
1771 # $args is a list of arguments, starting from index 0, not including $part1
1772
1773 $newline = $matches[1];
1774 $part1 = $matches[2];
1775 # If the third subpattern matched anything, it will start with |
1776
1777 $args = $this->getTemplateArgs($matches[3]);
1778 $argc = count( $args );
1779
1780 # {{{}}}
1781 if ( strpos( $matches[0], '{{{' ) !== false ) {
1782 $text = $matches[0];
1783 $found = true;
1784 $noparse = true;
1785 }
1786
1787 # SUBST
1788 if ( !$found ) {
1789 $mwSubst =& MagicWord::get( MAG_SUBST );
1790 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1791 if ( $this->mOutputType != OT_WIKI ) {
1792 # Invalid SUBST not replaced at PST time
1793 # Return without further processing
1794 $text = $matches[0];
1795 $found = true;
1796 $noparse= true;
1797 }
1798 } elseif ( $this->mOutputType == OT_WIKI ) {
1799 # SUBST not found in PST pass, do nothing
1800 $text = $matches[0];
1801 $found = true;
1802 }
1803 }
1804
1805 # MSG, MSGNW and INT
1806 if ( !$found ) {
1807 # Check for MSGNW:
1808 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1809 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1810 $nowiki = true;
1811 } else {
1812 # Remove obsolete MSG:
1813 $mwMsg =& MagicWord::get( MAG_MSG );
1814 $mwMsg->matchStartAndRemove( $part1 );
1815 }
1816
1817 # Check if it is an internal message
1818 $mwInt =& MagicWord::get( MAG_INT );
1819 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1820 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1821 $text = wfMsgReal( $part1, $args, true );
1822 $found = true;
1823 }
1824 }
1825 }
1826
1827 # NS
1828 if ( !$found ) {
1829 # Check for NS: (namespace expansion)
1830 $mwNs = MagicWord::get( MAG_NS );
1831 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1832 if ( intval( $part1 ) ) {
1833 $text = $wgLang->getNsText( intval( $part1 ) );
1834 $found = true;
1835 } else {
1836 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1837 if ( !is_null( $index ) ) {
1838 $text = $wgLang->getNsText( $index );
1839 $found = true;
1840 }
1841 }
1842 }
1843 }
1844
1845 # LOCALURL and LOCALURLE
1846 if ( !$found ) {
1847 $mwLocal = MagicWord::get( MAG_LOCALURL );
1848 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1849
1850 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1851 $func = 'getLocalURL';
1852 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1853 $func = 'escapeLocalURL';
1854 } else {
1855 $func = '';
1856 }
1857
1858 if ( $func !== '' ) {
1859 $title = Title::newFromText( $part1 );
1860 if ( !is_null( $title ) ) {
1861 if ( $argc > 0 ) {
1862 $text = $title->$func( $args[0] );
1863 } else {
1864 $text = $title->$func();
1865 }
1866 $found = true;
1867 }
1868 }
1869 }
1870
1871 # Internal variables
1872 if ( !$this->mVariables ) {
1873 $this->initialiseVariables();
1874 }
1875 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1876 $text = $this->mVariables[$part1];
1877 $found = true;
1878 $this->mOutput->mContainsOldMagic = true;
1879 }
1880
1881 # Template table test
1882
1883 # Did we encounter this template already? If yes, it is in the cache
1884 # and we need to check for loops.
1885 if ( isset( $this->mTemplates[$part1] ) ) {
1886 # Infinite loop test
1887 if ( isset( $this->mTemplatePath[$part1] ) ) {
1888 $noparse = true;
1889 $found = true;
1890 }
1891 # set $text to cached message.
1892 $text = $this->mTemplates[$part1];
1893 $found = true;
1894 }
1895
1896 # Load from database
1897 if ( !$found ) {
1898 $title = Title::newFromText( $part1, NS_TEMPLATE );
1899 if ( !is_null( $title ) && !$title->isExternal() ) {
1900 # Check for excessive inclusion
1901 $dbk = $title->getPrefixedDBkey();
1902 if ( $this->incrementIncludeCount( $dbk ) ) {
1903 # This should never be reached.
1904 $article = new Article( $title );
1905 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1906 if ( $articleContent !== false ) {
1907 $found = true;
1908 $text = $articleContent;
1909 }
1910 }
1911
1912 # If the title is valid but undisplayable, make a link to it
1913 if ( $this->mOutputType == OT_HTML && !$found ) {
1914 $text = '[['.$title->getPrefixedText().']]';
1915 $found = true;
1916 }
1917
1918 # Template cache array insertion
1919 $this->mTemplates[$part1] = $text;
1920 }
1921 }
1922
1923 # Recursive parsing, escaping and link table handling
1924 # Only for HTML output
1925 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1926 $text = wfEscapeWikiText( $text );
1927 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1928 # Clean up argument array
1929 $assocArgs = array();
1930 $index = 1;
1931 foreach( $args as $arg ) {
1932 $eqpos = strpos( $arg, '=' );
1933 if ( $eqpos === false ) {
1934 $assocArgs[$index++] = $arg;
1935 } else {
1936 $name = trim( substr( $arg, 0, $eqpos ) );
1937 $value = trim( substr( $arg, $eqpos+1 ) );
1938 if ( $value === false ) {
1939 $value = '';
1940 }
1941 if ( $name !== false ) {
1942 $assocArgs[$name] = $value;
1943 }
1944 }
1945 }
1946
1947 # Do not enter included links in link table
1948 if ( !is_null( $title ) ) {
1949 $wgLinkCache->suspend();
1950 }
1951
1952 # Add a new element to the templace recursion path
1953 $this->mTemplatePath[$part1] = 1;
1954
1955 $text = $this->stripParse( $text, $newline, $assocArgs );
1956
1957 # Resume the link cache and register the inclusion as a link
1958 if ( !is_null( $title ) ) {
1959 $wgLinkCache->resume();
1960 $wgLinkCache->addLinkObj( $title );
1961 }
1962 }
1963 # Empties the template path
1964 $this->mTemplatePath = array();
1965
1966 if ( !$found ) {
1967 return $matches[0];
1968 } else {
1969 return $text;
1970 }
1971 }
1972
1973 # Triple brace replacement -- used for template arguments
1974 function argSubstitution( $matches ) {
1975 $newline = $matches[1];
1976 $arg = trim( $matches[2] );
1977 $text = $matches[0];
1978 $inputArgs = end( $this->mArgStack );
1979
1980 if ( array_key_exists( $arg, $inputArgs ) ) {
1981 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1982 }
1983
1984 return $text;
1985 }
1986
1987 # Returns true if the function is allowed to include this entity
1988 function incrementIncludeCount( $dbk ) {
1989 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1990 $this->mIncludeCount[$dbk] = 0;
1991 }
1992 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1993 return true;
1994 } else {
1995 return false;
1996 }
1997 }
1998
1999
2000 # Cleans up HTML, removes dangerous tags and attributes
2001 /* private */ function removeHTMLtags( $text ) {
2002 global $wgUseTidy, $wgUserHtml;
2003 $fname = 'Parser::removeHTMLtags';
2004 wfProfileIn( $fname );
2005
2006 if( $wgUserHtml ) {
2007 $htmlpairs = array( # Tags that must be closed
2008 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
2009 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
2010 'strike', 'strong', 'tt', 'var', 'div', 'center',
2011 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
2012 'ruby', 'rt' , 'rb' , 'rp', 'p'
2013 );
2014 $htmlsingle = array(
2015 'br', 'hr', 'li', 'dt', 'dd'
2016 );
2017 $htmlnest = array( # Tags that can be nested--??
2018 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
2019 'dl', 'font', 'big', 'small', 'sub', 'sup'
2020 );
2021 $tabletags = array( # Can only appear inside table
2022 'td', 'th', 'tr'
2023 );
2024 } else {
2025 $htmlpairs = array();
2026 $htmlsingle = array();
2027 $htmlnest = array();
2028 $tabletags = array();
2029 }
2030
2031 $htmlsingle = array_merge( $tabletags, $htmlsingle );
2032 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
2033
2034 $htmlattrs = $this->getHTMLattrs () ;
2035
2036 # Remove HTML comments
2037 $text = preg_replace( '/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU', '$2', $text );
2038
2039 $bits = explode( '<', $text );
2040 $text = array_shift( $bits );
2041 if(!$wgUseTidy) {
2042 $tagstack = array(); $tablestack = array();
2043 foreach ( $bits as $x ) {
2044 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
2045 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2046 $x, $regs );
2047 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2048 error_reporting( $prev );
2049
2050 $badtag = 0 ;
2051 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2052 # Check our stack
2053 if ( $slash ) {
2054 # Closing a tag...
2055 if ( ! in_array( $t, $htmlsingle ) &&
2056 ( $ot = @array_pop( $tagstack ) ) != $t ) {
2057 @array_push( $tagstack, $ot );
2058 $badtag = 1;
2059 } else {
2060 if ( $t == 'table' ) {
2061 $tagstack = array_pop( $tablestack );
2062 }
2063 $newparams = '';
2064 }
2065 } else {
2066 # Keep track for later
2067 if ( in_array( $t, $tabletags ) &&
2068 ! in_array( 'table', $tagstack ) ) {
2069 $badtag = 1;
2070 } else if ( in_array( $t, $tagstack ) &&
2071 ! in_array ( $t , $htmlnest ) ) {
2072 $badtag = 1 ;
2073 } else if ( ! in_array( $t, $htmlsingle ) ) {
2074 if ( $t == 'table' ) {
2075 array_push( $tablestack, $tagstack );
2076 $tagstack = array();
2077 }
2078 array_push( $tagstack, $t );
2079 }
2080 # Strip non-approved attributes from the tag
2081 $newparams = $this->fixTagAttributes($params);
2082
2083 }
2084 if ( ! $badtag ) {
2085 $rest = str_replace( '>', '&gt;', $rest );
2086 $text .= "<$slash$t $newparams$brace$rest";
2087 continue;
2088 }
2089 }
2090 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2091 }
2092 # Close off any remaining tags
2093 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
2094 $text .= "</$t>\n";
2095 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2096 }
2097 } else {
2098 # this might be possible using tidy itself
2099 foreach ( $bits as $x ) {
2100 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2101 $x, $regs );
2102 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2103 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2104 $newparams = $this->fixTagAttributes($params);
2105 $rest = str_replace( '>', '&gt;', $rest );
2106 $text .= "<$slash$t $newparams$brace$rest";
2107 } else {
2108 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2109 }
2110 }
2111 }
2112 wfProfileOut( $fname );
2113 return $text;
2114 }
2115
2116
2117 # This function accomplishes several tasks:
2118 # 1) Auto-number headings if that option is enabled
2119 # 2) Add an [edit] link to sections for logged in users who have enabled the option
2120 # 3) Add a Table of contents on the top for users who have enabled the option
2121 # 4) Auto-anchor headings
2122 #
2123 # It loops through all headlines, collects the necessary data, then splits up the
2124 # string and re-inserts the newly formatted headlines.
2125 /* private */ function formatHeadings( $text, $isMain=true ) {
2126 global $wgInputEncoding, $wgMaxTocLevel;
2127
2128 $doNumberHeadings = $this->mOptions->getNumberHeadings();
2129 $doShowToc = $this->mOptions->getShowToc();
2130 $forceTocHere = false;
2131 if( !$this->mTitle->userCanEdit() ) {
2132 $showEditLink = 0;
2133 $rightClickHack = 0;
2134 } else {
2135 $showEditLink = $this->mOptions->getEditSection();
2136 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
2137 }
2138
2139 # Inhibit editsection links if requested in the page
2140 $esw =& MagicWord::get( MAG_NOEDITSECTION );
2141 if( $esw->matchAndRemove( $text ) ) {
2142 $showEditLink = 0;
2143 }
2144 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2145 # do not add TOC
2146 $mw =& MagicWord::get( MAG_NOTOC );
2147 if( $mw->matchAndRemove( $text ) ) {
2148 $doShowToc = 0;
2149 }
2150
2151 # never add the TOC to the Main Page. This is an entry page that should not
2152 # be more than 1-2 screens large anyway
2153 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
2154 $doShowToc = 0;
2155 }
2156
2157 # Get all headlines for numbering them and adding funky stuff like [edit]
2158 # links - this is for later, but we need the number of headlines right now
2159 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2160
2161 # if there are fewer than 4 headlines in the article, do not show TOC
2162 if( $numMatches < 4 ) {
2163 $doShowToc = 0;
2164 }
2165
2166 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2167 # override above conditions and always show TOC at that place
2168 $mw =& MagicWord::get( MAG_TOC );
2169 if ($mw->match( $text ) ) {
2170 $doShowToc = 1;
2171 $forceTocHere = true;
2172 } else {
2173 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2174 # override above conditions and always show TOC above first header
2175 $mw =& MagicWord::get( MAG_FORCETOC );
2176 if ($mw->matchAndRemove( $text ) ) {
2177 $doShowToc = 1;
2178 }
2179 }
2180
2181
2182
2183 # We need this to perform operations on the HTML
2184 $sk =& $this->mOptions->getSkin();
2185
2186 # headline counter
2187 $headlineCount = 0;
2188
2189 # Ugh .. the TOC should have neat indentation levels which can be
2190 # passed to the skin functions. These are determined here
2191 $toclevel = 0;
2192 $toc = '';
2193 $full = '';
2194 $head = array();
2195 $sublevelCount = array();
2196 $level = 0;
2197 $prevlevel = 0;
2198 foreach( $matches[3] as $headline ) {
2199 $numbering = '';
2200 if( $level ) {
2201 $prevlevel = $level;
2202 }
2203 $level = $matches[1][$headlineCount];
2204 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
2205 # reset when we enter a new level
2206 $sublevelCount[$level] = 0;
2207 $toc .= $sk->tocIndent( $level - $prevlevel );
2208 $toclevel += $level - $prevlevel;
2209 }
2210 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
2211 # reset when we step back a level
2212 $sublevelCount[$level+1]=0;
2213 $toc .= $sk->tocUnindent( $prevlevel - $level );
2214 $toclevel -= $prevlevel - $level;
2215 }
2216 # count number of headlines for each level
2217 @$sublevelCount[$level]++;
2218 if( $doNumberHeadings || $doShowToc ) {
2219 $dot = 0;
2220 for( $i = 1; $i <= $level; $i++ ) {
2221 if( !empty( $sublevelCount[$i] ) ) {
2222 if( $dot ) {
2223 $numbering .= '.';
2224 }
2225 $numbering .= $sublevelCount[$i];
2226 $dot = 1;
2227 }
2228 }
2229 }
2230
2231 # The canonized header is a version of the header text safe to use for links
2232 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2233 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
2234 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
2235
2236 # strip out HTML
2237 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2238 $tocline = trim( $canonized_headline );
2239 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
2240 $replacearray = array(
2241 '%3A' => ':',
2242 '%' => '.'
2243 );
2244 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2245 $refer[$headlineCount] = $canonized_headline;
2246
2247 # count how many in assoc. array so we can track dupes in anchors
2248 @$refers[$canonized_headline]++;
2249 $refcount[$headlineCount]=$refers[$canonized_headline];
2250
2251 # Prepend the number to the heading text
2252
2253 if( $doNumberHeadings || $doShowToc ) {
2254 $tocline = $numbering . ' ' . $tocline;
2255
2256 # Don't number the heading if it is the only one (looks silly)
2257 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2258 # the two are different if the line contains a link
2259 $headline=$numbering . ' ' . $headline;
2260 }
2261 }
2262
2263 # Create the anchor for linking from the TOC to the section
2264 $anchor = $canonized_headline;
2265 if($refcount[$headlineCount] > 1 ) {
2266 $anchor .= '_' . $refcount[$headlineCount];
2267 }
2268 if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2269 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2270 }
2271 if( $showEditLink ) {
2272 if ( empty( $head[$headlineCount] ) ) {
2273 $head[$headlineCount] = '';
2274 }
2275 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
2276 }
2277
2278 # Add the edit section span
2279 if( $rightClickHack ) {
2280 $headline = $sk->editSectionScript($headlineCount+1,$headline);
2281 }
2282
2283 # give headline the correct <h#> tag
2284 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
2285
2286 $headlineCount++;
2287 }
2288
2289 if( $doShowToc ) {
2290 $toclines = $headlineCount;
2291 $toc .= $sk->tocUnindent( $toclevel );
2292 $toc = $sk->tocTable( $toc );
2293 }
2294
2295 # split up and insert constructed headlines
2296
2297 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2298 $i = 0;
2299
2300 foreach( $blocks as $block ) {
2301 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2302 # This is the [edit] link that appears for the top block of text when
2303 # section editing is enabled
2304
2305 # Disabled because it broke block formatting
2306 # For example, a bullet point in the top line
2307 # $full .= $sk->editSectionLink(0);
2308 }
2309 $full .= $block;
2310 if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2311 # Top anchor now in skin
2312 $full = $full.$toc;
2313 }
2314
2315 if( !empty( $head[$i] ) ) {
2316 $full .= $head[$i];
2317 }
2318 $i++;
2319 }
2320 if($forceTocHere) {
2321 $mw =& MagicWord::get( MAG_TOC );
2322 return $mw->replace( $toc, $full );
2323 } else {
2324 return $full;
2325 }
2326 }
2327
2328 # Return an HTML link for the "ISBN 123456" text
2329 /* private */ function magicISBN( $text ) {
2330 global $wgLang;
2331 $fname = 'Parser::magicISBN';
2332 wfProfileIn( $fname );
2333
2334 $a = split( 'ISBN ', " $text" );
2335 if ( count ( $a ) < 2 ) {
2336 wfProfileOut( $fname );
2337 return $text;
2338 }
2339 $text = substr( array_shift( $a ), 1);
2340 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2341
2342 foreach ( $a as $x ) {
2343 $isbn = $blank = '' ;
2344 while ( ' ' == $x{0} ) {
2345 $blank .= ' ';
2346 $x = substr( $x, 1 );
2347 }
2348 while ( strstr( $valid, $x{0} ) != false ) {
2349 $isbn .= $x{0};
2350 $x = substr( $x, 1 );
2351 }
2352 $num = str_replace( '-', '', $isbn );
2353 $num = str_replace( ' ', '', $num );
2354
2355 if ( '' == $num ) {
2356 $text .= "ISBN $blank$x";
2357 } else {
2358 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2359 $text .= '<a href="' .
2360 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
2361 "\" class=\"internal\">ISBN $isbn</a>";
2362 $text .= $x;
2363 }
2364 }
2365 wfProfileOut( $fname );
2366 return $text;
2367 }
2368
2369 # Return an HTML link for the "GEO ..." text
2370 /* private */ function magicGEO( $text ) {
2371 global $wgLang, $wgUseGeoMode;
2372 $fname = 'Parser::magicGEO';
2373 wfProfileIn( $fname );
2374
2375 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2376 $directions = array ( "N" => "North" , "S" => "South" , "E" => "East" , "W" => "West" ) ;
2377 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2378 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2379 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2380 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2381
2382 $a = split( 'GEO ', " $text" );
2383 if ( count ( $a ) < 2 ) {
2384 wfProfileOut( $fname );
2385 return $text;
2386 }
2387 $text = substr( array_shift( $a ), 1);
2388 $valid = '0123456789.+-:';
2389
2390 foreach ( $a as $x ) {
2391 $geo = $blank = '' ;
2392 while ( ' ' == $x{0} ) {
2393 $blank .= ' ';
2394 $x = substr( $x, 1 );
2395 }
2396 while ( strstr( $valid, $x{0} ) != false ) {
2397 $geo .= $x{0};
2398 $x = substr( $x, 1 );
2399 }
2400 $num = str_replace( '+', '', $geo );
2401 $num = str_replace( ' ', '', $num );
2402
2403 if ( '' == $num || count ( explode ( ":" , $num , 3 ) ) < 2 ) {
2404 $text .= "GEO $blank$x";
2405 } else {
2406 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2407 $text .= '<a href="' .
2408 $titleObj->escapeLocalUrl( "coordinates={$num}" ) .
2409 "\" class=\"internal\">GEO $geo</a>";
2410 $text .= $x;
2411 }
2412 }
2413 wfProfileOut( $fname );
2414 return $text;
2415 }
2416
2417 # Return an HTML link for the "RFC 1234" text
2418 /* private */ function magicRFC( $text ) {
2419 global $wgLang;
2420
2421 $a = split( 'RFC ', ' '.$text );
2422 if ( count ( $a ) < 2 ) return $text;
2423 $text = substr( array_shift( $a ), 1);
2424 $valid = '0123456789';
2425
2426 foreach ( $a as $x ) {
2427 $rfc = $blank = '' ;
2428 while ( ' ' == $x{0} ) {
2429 $blank .= ' ';
2430 $x = substr( $x, 1 );
2431 }
2432 while ( strstr( $valid, $x{0} ) != false ) {
2433 $rfc .= $x{0};
2434 $x = substr( $x, 1 );
2435 }
2436
2437 if ( '' == $rfc ) {
2438 $text .= "RFC $blank$x";
2439 } else {
2440 $url = wfmsg( 'rfcurl' );
2441 $url = str_replace( '$1', $rfc, $url);
2442 $sk =& $this->mOptions->getSkin();
2443 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
2444 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2445 }
2446 }
2447 return $text;
2448 }
2449
2450 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2451 $this->mOptions = $options;
2452 $this->mTitle =& $title;
2453 $this->mOutputType = OT_WIKI;
2454
2455 if ( $clearState ) {
2456 $this->clearState();
2457 }
2458
2459 $stripState = false;
2460 $pairs = array(
2461 "\r\n" => "\n",
2462 );
2463 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2464 // now with regexes
2465 /*
2466 $pairs = array(
2467 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2468 "/<br *?>/i" => "<br />",
2469 );
2470 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2471 */
2472 $text = $this->strip( $text, $stripState, false );
2473 $text = $this->pstPass2( $text, $user );
2474 $text = $this->unstrip( $text, $stripState );
2475 $text = $this->unstripNoWiki( $text, $stripState );
2476 return $text;
2477 }
2478
2479 /* private */ function pstPass2( $text, &$user ) {
2480 global $wgLang, $wgLocaltimezone, $wgCurParser;
2481
2482 # Variable replacement
2483 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2484 $text = $this->replaceVariables( $text );
2485
2486 # Signatures
2487 #
2488 $n = $user->getName();
2489 $k = $user->getOption( 'nickname' );
2490 if ( '' == $k ) { $k = $n; }
2491 if(isset($wgLocaltimezone)) {
2492 $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2493 }
2494 /* Note: this is an ugly timezone hack for the European wikis */
2495 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2496 ' (' . date( 'T' ) . ')';
2497 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2498
2499 $text = preg_replace( '/~~~~~/', $d, $text );
2500 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2501 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2502
2503 # Context links: [[|name]] and [[name (context)|]]
2504 #
2505 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2506 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2507 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2508 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2509
2510 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
2511 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
2512 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
2513 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
2514 # [[ns:page (cont)|]]
2515 $context = '';
2516 $t = $this->mTitle->getText();
2517 if ( preg_match( $conpat, $t, $m ) ) {
2518 $context = $m[2];
2519 }
2520 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2521 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2522 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2523
2524 if ( '' == $context ) {
2525 $text = preg_replace( $p2, '[[\\1]]', $text );
2526 } else {
2527 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2528 }
2529
2530 /*
2531 $mw =& MagicWord::get( MAG_SUBST );
2532 $wgCurParser = $this->fork();
2533 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2534 $this->merge( $wgCurParser );
2535 */
2536
2537 # Trim trailing whitespace
2538 # MAG_END (__END__) tag allows for trailing
2539 # whitespace to be deliberately included
2540 $text = rtrim( $text );
2541 $mw =& MagicWord::get( MAG_END );
2542 $mw->matchAndRemove( $text );
2543
2544 return $text;
2545 }
2546
2547 # Set up some variables which are usually set up in parse()
2548 # so that an external function can call some class members with confidence
2549 function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2550 $this->mTitle =& $title;
2551 $this->mOptions = $options;
2552 $this->mOutputType = $outputType;
2553 if ( $clearState ) {
2554 $this->clearState();
2555 }
2556 }
2557
2558 function transformMsg( $text, $options ) {
2559 global $wgTitle;
2560 static $executing = false;
2561
2562 # Guard against infinite recursion
2563 if ( $executing ) {
2564 return $text;
2565 }
2566 $executing = true;
2567
2568 $this->mTitle = $wgTitle;
2569 $this->mOptions = $options;
2570 $this->mOutputType = OT_MSG;
2571 $this->clearState();
2572 $text = $this->replaceVariables( $text );
2573
2574 $executing = false;
2575 return $text;
2576 }
2577
2578 # Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2579 # Callback will be called with the text within
2580 # Transform and return the text within
2581 function setHook( $tag, $callback ) {
2582 $oldVal = @$this->mTagHooks[$tag];
2583 $this->mTagHooks[$tag] = $callback;
2584 return $oldVal;
2585 }
2586 }
2587
2588 class ParserOutput
2589 {
2590 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2591 var $mCacheTime; # Used in ParserCache
2592
2593 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2594 $containsOldMagic = false )
2595 {
2596 $this->mText = $text;
2597 $this->mLanguageLinks = $languageLinks;
2598 $this->mCategoryLinks = $categoryLinks;
2599 $this->mContainsOldMagic = $containsOldMagic;
2600 $this->mCacheTime = "";
2601 }
2602
2603 function getText() { return $this->mText; }
2604 function getLanguageLinks() { return $this->mLanguageLinks; }
2605 function getCategoryLinks() { return $this->mCategoryLinks; }
2606 function getCacheTime() { return $this->mCacheTime; }
2607 function containsOldMagic() { return $this->mContainsOldMagic; }
2608 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2609 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2610 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2611 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2612 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2613
2614 function merge( $other ) {
2615 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2616 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2617 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2618 }
2619
2620 }
2621
2622 class ParserOptions
2623 {
2624 # All variables are private
2625 var $mUseTeX; # Use texvc to expand <math> tags
2626 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2627 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2628 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2629 var $mAllowExternalImages; # Allow external images inline
2630 var $mSkin; # Reference to the preferred skin
2631 var $mDateFormat; # Date format index
2632 var $mEditSection; # Create "edit section" links
2633 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2634 var $mNumberHeadings; # Automatically number headings
2635 var $mShowToc; # Show table of contents
2636
2637 function getUseTeX() { return $this->mUseTeX; }
2638 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2639 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2640 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2641 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2642 function getSkin() { return $this->mSkin; }
2643 function getDateFormat() { return $this->mDateFormat; }
2644 function getEditSection() { return $this->mEditSection; }
2645 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2646 function getNumberHeadings() { return $this->mNumberHeadings; }
2647 function getShowToc() { return $this->mShowToc; }
2648
2649 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2650 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2651 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2652 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2653 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2654 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2655 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2656 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2657 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2658 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2659
2660 function setSkin( &$x ) { $this->mSkin =& $x; }
2661
2662 # Get parser options
2663 /* static */ function newFromUser( &$user ) {
2664 $popts = new ParserOptions;
2665 $popts->initialiseFromUser( $user );
2666 return $popts;
2667 }
2668
2669 # Get user options
2670 function initialiseFromUser( &$userInput ) {
2671 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2672
2673 $fname = "ParserOptions::initialiseFromUser";
2674 wfProfileIn( $fname );
2675 if ( !$userInput ) {
2676 $user = new User;
2677 $user->setLoaded( true );
2678 } else {
2679 $user =& $userInput;
2680 }
2681
2682 $this->mUseTeX = $wgUseTeX;
2683 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2684 $this->mUseDynamicDates = $wgUseDynamicDates;
2685 $this->mInterwikiMagic = $wgInterwikiMagic;
2686 $this->mAllowExternalImages = $wgAllowExternalImages;
2687 wfProfileIn( "$fname-skin" );
2688 $this->mSkin =& $user->getSkin();
2689 wfProfileOut( "$fname-skin" );
2690 $this->mDateFormat = $user->getOption( 'date' );
2691 $this->mEditSection = $user->getOption( 'editsection' );
2692 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2693 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2694 $this->mShowToc = $user->getOption( 'showtoc' );
2695 wfProfileOut( $fname );
2696 }
2697
2698
2699 }
2700
2701 # Regex callbacks, used in Parser::replaceVariables
2702 function wfBraceSubstitution( $matches ) {
2703 global $wgCurParser;
2704 return $wgCurParser->braceSubstitution( $matches );
2705 }
2706
2707 function wfArgSubstitution( $matches ) {
2708 global $wgCurParser;
2709 return $wgCurParser->argSubstitution( $matches );
2710 }
2711
2712 function wfVariableSubstitution( $matches ) {
2713 global $wgCurParser;
2714 return $wgCurParser->variableSubstitution( $matches );
2715 }
2716
2717 # Return the total number of articles
2718 function wfNumberOfArticles() {
2719 global $wgNumberOfArticles;
2720
2721 wfLoadSiteStats();
2722 return $wgNumberOfArticles;
2723 }
2724
2725 # Get various statistics from the database
2726 /* private */ function wfLoadSiteStats() {
2727 global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2728 $fname = 'wfLoadSiteStats';
2729
2730 if ( -1 != $wgNumberOfArticles ) return;
2731 $dbr =& wfGetDB( DB_SLAVE );
2732 $s = $dbr->getArray( 'site_stats',
2733 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2734 array( 'ss_row_id' => 1 ), $fname
2735 );
2736
2737 if ( $s === false ) {
2738 return;
2739 } else {
2740 $wgTotalViews = $s->ss_total_views;
2741 $wgTotalEdits = $s->ss_total_edits;
2742 $wgNumberOfArticles = $s->ss_good_articles;
2743 }
2744 }
2745
2746 function wfEscapeHTMLTagsOnly( $in ) {
2747 return str_replace(
2748 array( '"', '>', '<' ),
2749 array( '&quot;', '&gt;', '&lt;' ),
2750 $in );
2751 }
2752
2753
2754 ?>