aeed58cc24045c85d08b4242c8d467220a730687
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 // require_once('Tokenizer.php');
4
5 # PHP Parser
6 #
7 # Processes wiki markup
8 #
9 # There are two main entry points into the Parser class: parse() and preSaveTransform().
10 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
11 #
12 # Globals used:
13 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
14 #
15 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
16 #
17 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
18 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
19 # $wgLocaltimezone
20 #
21 # * only within ParserOptions
22 #
23 #
24 #----------------------------------------
25 # Variable substitution O(N^2) attack
26 #-----------------------------------------
27 # Without countermeasures, it would be possible to attack the parser by saving a page
28 # filled with a large number of inclusions of large pages. The size of the generated
29 # page would be proportional to the square of the input size. Hence, we limit the number
30 # of inclusions of any given page, thus bringing any attack back to O(N).
31 #
32
33 define( "MAX_INCLUDE_REPEAT", 20 );
34 define( "MAX_INCLUDE_SIZE", 1000000 ); // 1 Million
35
36 # Allowed values for $mOutputType
37 define( "OT_HTML", 1 );
38 define( "OT_WIKI", 2 );
39 define( "OT_MSG", 3 );
40
41 # string parameter for extractTags which will cause it
42 # to strip HTML comments in addition to regular
43 # <XML>-style tags. This should not be anything we
44 # may want to use in wikisyntax
45 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
46
47 # prefix for escaping, used in two functions at least
48 define( 'UNIQ_PREFIX', 'NaodW29');
49
50
51 # Constants needed for external link processing
52
53 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
54 define( 'HTTP_PROTOCOLS', 'http|https' );
55 # Everything except bracket, space, or control characters
56 define( 'EXT_LINK_URL_CLASS', '[^]\\x00-\\x20\\x7F]' );
57 define( 'INVERSE_EXT_LINK_URL_CLASS', '[\]\\x00-\\x20\\x7F]' );
58 # Including space
59 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
60 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
61 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
62 define( 'EXT_LINK_BRACKETED', '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
63 define( 'EXT_IMAGE_REGEX',
64 '/^('.HTTP_PROTOCOLS.':)'. # Protocol
65 '('.EXT_LINK_URL_CLASS.'+)\\/'. # Hostname and path
66 '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
67 );
68
69 class Parser
70 {
71 # Persistent:
72 var $mTagHooks;
73
74 # Cleared with clearState():
75 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
76 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
77
78 # Temporary:
79 var $mOptions, $mTitle, $mOutputType,
80 $mTemplates, // cache of already loaded templates, avoids
81 // multiple SQL queries for the same string
82 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
83 // in this path. Used for loop detection.
84
85 function Parser() {
86 $this->mTemplates = array();
87 $this->mTemplatePath = array();
88 $this->mTagHooks = array();
89 $this->clearState();
90 }
91
92 function clearState() {
93 $this->mOutput = new ParserOutput;
94 $this->mAutonumber = 0;
95 $this->mLastSection = "";
96 $this->mDTopen = false;
97 $this->mVariables = false;
98 $this->mIncludeCount = array();
99 $this->mStripState = array();
100 $this->mArgStack = array();
101 $this->mInPre = false;
102 }
103
104 # First pass--just handle <nowiki> sections, pass the rest off
105 # to internalParse() which does all the real work.
106 #
107 # Returns a ParserOutput
108 #
109 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
110 global $wgUseTidy;
111 $fname = "Parser::parse";
112 wfProfileIn( $fname );
113
114 if ( $clearState ) {
115 $this->clearState();
116 }
117
118 $this->mOptions = $options;
119 $this->mTitle =& $title;
120 $this->mOutputType = OT_HTML;
121
122 $stripState = NULL;
123 $text = $this->strip( $text, $this->mStripState );
124 $text = $this->internalParse( $text, $linestart );
125 $text = $this->unstrip( $text, $this->mStripState );
126 # Clean up special characters, only run once, next-to-last before doBlockLevels
127 if(!$wgUseTidy) {
128 $fixtags = array(
129 # french spaces, last one Guillemet-left
130 # only if there is something before the space
131 '/(.) (\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
132 # french spaces, Guillemet-right
133 "/(\\302\\253) /i"=>"\\1&nbsp;",
134 '/<hr *>/i' => '<hr />',
135 '/<br *>/i' => '<br />',
136 '/<center *>/i' => '<div class="center">',
137 '/<\\/center *>/i' => '</div>',
138 # Clean up spare ampersands; note that we probably ought to be
139 # more careful about named entities.
140 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
141 );
142 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
143 } else {
144 $fixtags = array(
145 # french spaces, last one Guillemet-left
146 '/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
147 # french spaces, Guillemet-right
148 '/(\\302\\253) /i' => '\\1&nbsp;',
149 '/([^> ]+(&#x30(1|3|9);)[^< ]*)/i' => '<span class="diacrit">\\1</span>',
150 '/<center *>/i' => '<div class="center">',
151 '/<\\/center *>/i' => '</div>'
152 );
153 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
154 }
155 # only once and last
156 $text = $this->doBlockLevels( $text, $linestart );
157 $text = $this->unstripNoWiki( $text, $this->mStripState );
158 if($wgUseTidy) {
159 $text = $this->tidy($text);
160 }
161 $this->mOutput->setText( $text );
162 wfProfileOut( $fname );
163 return $this->mOutput;
164 }
165
166 /* static */ function getRandomString() {
167 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
168 }
169
170 # Replaces all occurrences of <$tag>content</$tag> in the text
171 # with a random marker and returns the new text. the output parameter
172 # $content will be an associative array filled with data on the form
173 # $unique_marker => content.
174
175 # If $content is already set, the additional entries will be appended
176
177 # If $tag is set to STRIP_COMMENTS, the function will extract
178 # <!-- HTML comments -->
179
180 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
181 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
182 if ( !$content ) {
183 $content = array( );
184 }
185 $n = 1;
186 $stripped = '';
187
188 while ( '' != $text ) {
189 if($tag==STRIP_COMMENTS) {
190 $p = preg_split( '/<!--/i', $text, 2 );
191 } else {
192 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
193 }
194 $stripped .= $p[0];
195 if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
196 $text = '';
197 } else {
198 if($tag==STRIP_COMMENTS) {
199 $q = preg_split( '/-->/i', $p[1], 2 );
200 } else {
201 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
202 }
203 $marker = $rnd . sprintf('%08X', $n++);
204 $content[$marker] = $q[0];
205 $stripped .= $marker;
206 $text = $q[1];
207 }
208 }
209 return $stripped;
210 }
211
212 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
213 # If $render is set, performs necessary rendering operations on plugins
214 # Returns the text, and fills an array with data needed in unstrip()
215 # If the $state is already a valid strip state, it adds to the state
216
217 # When $stripcomments is set, HTML comments <!-- like this -->
218 # will be stripped in addition to other tags. This is important
219 # for section editing, where these comments cause confusion when
220 # counting the sections in the wikisource
221 function strip( $text, &$state, $stripcomments = false ) {
222 $render = ($this->mOutputType == OT_HTML);
223 $html_content = array();
224 $nowiki_content = array();
225 $math_content = array();
226 $pre_content = array();
227 $comment_content = array();
228 $ext_content = array();
229
230 # Replace any instances of the placeholders
231 $uniq_prefix = UNIQ_PREFIX;
232 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
233
234 # html
235 global $wgRawHtml;
236 if( $wgRawHtml ) {
237 $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
238 foreach( $html_content as $marker => $content ) {
239 if ($render ) {
240 # Raw and unchecked for validity.
241 $html_content[$marker] = $content;
242 } else {
243 $html_content[$marker] = "<html>$content</html>";
244 }
245 }
246 }
247
248 # nowiki
249 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
250 foreach( $nowiki_content as $marker => $content ) {
251 if( $render ){
252 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
253 } else {
254 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
255 }
256 }
257
258 # math
259 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
260 foreach( $math_content as $marker => $content ){
261 if( $render ) {
262 if( $this->mOptions->getUseTeX() ) {
263 $math_content[$marker] = renderMath( $content );
264 } else {
265 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
266 }
267 } else {
268 $math_content[$marker] = "<math>$content</math>";
269 }
270 }
271
272 # pre
273 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
274 foreach( $pre_content as $marker => $content ){
275 if( $render ){
276 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
277 } else {
278 $pre_content[$marker] = "<pre>$content</pre>";
279 }
280 }
281
282 # Comments
283 if($stripcomments) {
284 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
285 foreach( $comment_content as $marker => $content ){
286 $comment_content[$marker] = "<!--$content-->";
287 }
288 }
289
290 # Extensions
291 foreach ( $this->mTagHooks as $tag => $callback ) {
292 $ext_contents[$tag] = array();
293 $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
294 foreach( $ext_content[$tag] as $marker => $content ) {
295 if ( $render ) {
296 $ext_content[$tag][$marker] = $callback( $content );
297 } else {
298 $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
299 }
300 }
301 }
302
303 # Merge state with the pre-existing state, if there is one
304 if ( $state ) {
305 $state['html'] = $state['html'] + $html_content;
306 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
307 $state['math'] = $state['math'] + $math_content;
308 $state['pre'] = $state['pre'] + $pre_content;
309 $state['comment'] = $state['comment'] + $comment_content;
310
311 foreach( $ext_content as $tag => $array ) {
312 if ( array_key_exists( $tag, $state ) ) {
313 $state[$tag] = $state[$tag] + $array;
314 }
315 }
316 } else {
317 $state = array(
318 'html' => $html_content,
319 'nowiki' => $nowiki_content,
320 'math' => $math_content,
321 'pre' => $pre_content,
322 'comment' => $comment_content,
323 ) + $ext_content;
324 }
325 return $text;
326 }
327
328 # always call unstripNoWiki() after this one
329 function unstrip( $text, &$state ) {
330 # Must expand in reverse order, otherwise nested tags will be corrupted
331 $contentDict = end( $state );
332 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
333 if( key($state) != 'nowiki' && key($state) != 'html') {
334 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
335 $text = str_replace( key( $contentDict ), $content, $text );
336 }
337 }
338 }
339
340 return $text;
341 }
342 # always call this after unstrip() to preserve the order
343 function unstripNoWiki( $text, &$state ) {
344 # Must expand in reverse order, otherwise nested tags will be corrupted
345 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
346 $text = str_replace( key( $state['nowiki'] ), $content, $text );
347 }
348
349 global $wgRawHtml;
350 if ($wgRawHtml) {
351 for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
352 $text = str_replace( key( $state['html'] ), $content, $text );
353 }
354 }
355
356 return $text;
357 }
358
359 # Add an item to the strip state
360 # Returns the unique tag which must be inserted into the stripped text
361 # The tag will be replaced with the original text in unstrip()
362
363 function insertStripItem( $text, &$state ) {
364 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
365 if ( !$state ) {
366 $state = array(
367 'html' => array(),
368 'nowiki' => array(),
369 'math' => array(),
370 'pre' => array()
371 );
372 }
373 $state['item'][$rnd] = $text;
374 return $rnd;
375 }
376
377 # categoryMagic
378 # generate a list of subcategories and pages for a category
379 # depending on wfMsg("usenewcategorypage") it either calls the new
380 # or the old code. The new code will not work properly for some
381 # languages due to sorting issues, so they might want to turn it
382 # off.
383 function categoryMagic() {
384 $msg = wfMsg('usenewcategorypage');
385 if ( '0' == @$msg[0] )
386 {
387 return $this->oldCategoryMagic();
388 } else {
389 return $this->newCategoryMagic();
390 }
391 }
392
393 # This method generates the list of subcategories and pages for a category
394 function oldCategoryMagic () {
395 global $wgLang ;
396 $fname = 'Parser::oldCategoryMagic';
397
398 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
399
400 if ( $this->mTitle->getNamespace() != NS_CATEGORY ) return "" ; # This ain't a category page
401
402 $r = "<br style=\"clear:both;\"/>\n";
403
404
405 $sk =& $this->mOptions->getSkin() ;
406
407 $articles = array() ;
408 $children = array() ;
409 $data = array () ;
410 $id = $this->mTitle->getArticleID() ;
411
412 # FIXME: add limits
413 $dbr =& wfGetDB( DB_SLAVE );
414 $cur = $dbr->tableName( 'cur' );
415 $categorylinks = $dbr->tableName( 'categorylinks' );
416
417 $t = $dbr->strencode( $this->mTitle->getDBKey() );
418 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM $cur,$categorylinks " .
419 "WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
420 $res = $dbr->query( $sql, $fname ) ;
421 while ( $x = $dbr->fetchObject ( $res ) ) $data[] = $x ;
422
423 # For all pages that link to this category
424 foreach ( $data AS $x )
425 {
426 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
427 if ( $t != '' ) $t .= ':' ;
428 $t .= $x->cur_title ;
429
430 if ( $x->cur_namespace == NS_CATEGORY ) {
431 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
432 } else {
433 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
434 }
435 }
436 $dbr->freeResult ( $res ) ;
437
438 # Showing subcategories
439 if ( count ( $children ) > 0 ) {
440 $r .= '<h2>'.wfMsg('subcategories')."</h2>\n" ;
441 $r .= implode ( ', ' , $children ) ;
442 }
443
444 # Showing pages in this category
445 if ( count ( $articles ) > 0 ) {
446 $ti = $this->mTitle->getText() ;
447 $h = wfMsg( 'category_header', $ti );
448 $r .= "<h2>{$h}</h2>\n" ;
449 $r .= implode ( ', ' , $articles ) ;
450 }
451
452 return $r ;
453 }
454
455 function newCategoryMagic () {
456 global $wgLang;
457 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
458
459 if ( $this->mTitle->getNamespace() != NS_CATEGORY ) return '' ; # This ain't a category page
460
461 $r = "<br style=\"clear:both;\"/>\n";
462
463
464 $sk =& $this->mOptions->getSkin() ;
465
466 $articles = array() ;
467 $articles_start_char = array();
468 $children = array() ;
469 $children_start_char = array();
470 $data = array () ;
471 $id = $this->mTitle->getArticleID() ;
472
473 # FIXME: add limits
474 $dbr =& wfGetDB( DB_SLAVE );
475 $cur = $dbr->tableName( 'cur' );
476 $categorylinks = $dbr->tableName( 'categorylinks' );
477
478 $t = $dbr->strencode( $this->mTitle->getDBKey() );
479 $sql = "SELECT DISTINCT cur_title,cur_namespace,cl_sortkey FROM " .
480 "$cur,$categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
481 $res = $dbr->query ( $sql ) ;
482 while ( $x = $dbr->fetchObject ( $res ) )
483 {
484 $t = $ns = $wgLang->getNsText ( $x->cur_namespace ) ;
485 if ( $t != '' ) $t .= ':' ;
486 $t .= $x->cur_title ;
487
488 if ( $x->cur_namespace == NS_CATEGORY ) {
489 $ctitle = str_replace( '_',' ',$x->cur_title );
490 array_push ( $children, $sk->makeKnownLink ( $t, $ctitle ) ) ; # Subcategory
491
492 // If there's a link from Category:A to Category:B, the sortkey of the resulting
493 // entry in the categorylinks table is Category:A, not A, which it SHOULD be.
494 // Workaround: If sortkey == "Category:".$title, than use $title for sorting,
495 // else use sortkey...
496 if ( ($ns.':'.$ctitle) == $x->cl_sortkey ) {
497 array_push ( $children_start_char, $wgLang->firstChar( $x->cur_title ) );
498 } else {
499 array_push ( $children_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
500 }
501 } else {
502 array_push ( $articles , $sk->makeKnownLink ( $t ) ) ; # Page in this category
503 array_push ( $articles_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
504 }
505 }
506 $dbr->freeResult ( $res ) ;
507
508 $ti = $this->mTitle->getText() ;
509
510 # Don't show subcategories section if there are none.
511 if ( count ( $children ) > 0 )
512 {
513 # Showing subcategories
514 $r .= '<h2>' . wfMsg( 'subcategories' ) . "</h2>\n";
515
516 $numchild = count( $children );
517 if($numchild == 1) {
518 $r .= wfMsg( 'subcategorycount1', 1 );
519 } else {
520 $r .= wfMsg( 'subcategorycount' , $numchild );
521 }
522 unset($numchild);
523
524 if ( count ( $children ) > 6 ) {
525
526 // divide list into three equal chunks
527 $chunk = (int) (count ( $children ) / 3);
528
529 // get and display header
530 $r .= '<table width="100%"><tr valign="top">';
531
532 $startChunk = 0;
533 $endChunk = $chunk;
534
535 // loop through the chunks
536 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
537 $chunkIndex < 3;
538 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
539 {
540
541 $r .= '<td><ul>';
542 // output all subcategories to category
543 for ($index = $startChunk ;
544 $index < $endChunk && $index < count($children);
545 $index++ )
546 {
547 // check for change of starting letter or begging of chunk
548 if ( ($children_start_char[$index] != $children_start_char[$index - 1])
549 || ($index == $startChunk) )
550 {
551 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
552 }
553
554 $r .= "<li>{$children[$index]}</li>";
555 }
556 $r .= '</ul></td>';
557
558
559 }
560 $r .= '</tr></table>';
561 } else {
562 // for short lists of subcategories to category.
563
564 $r .= "<h3>{$children_start_char[0]}</h3>\n";
565 $r .= '<ul><li>'.$children[0].'</li>';
566 for ($index = 1; $index < count($children); $index++ )
567 {
568 if ($children_start_char[$index] != $children_start_char[$index - 1])
569 {
570 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
571 }
572
573 $r .= "<li>{$children[$index]}</li>";
574 }
575 $r .= '</ul>';
576 }
577 } # END of if ( count($children) > 0 )
578
579 $r .= '<h2>' . wfMsg( 'category_header', $ti ) . "</h2>\n";
580
581 $numart = count( $articles );
582 if($numart == 1) {
583 $r .= wfMsg( 'categoryarticlecount1', 1 );
584 } else {
585 $r .= wfMsg( 'categoryarticlecount' , $numart );
586 }
587 unset($numart);
588
589 # Showing articles in this category
590 if ( count ( $articles ) > 6) {
591 $ti = $this->mTitle->getText() ;
592
593 // divide list into three equal chunks
594 $chunk = (int) (count ( $articles ) / 3);
595
596 // get and display header
597 $r .= '<table width="100%"><tr valign="top">';
598
599 // loop through the chunks
600 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
601 $chunkIndex < 3;
602 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
603 {
604
605 $r .= '<td><ul>';
606
607 // output all articles in category
608 for ($index = $startChunk ;
609 $index < $endChunk && $index < count($articles);
610 $index++ )
611 {
612 // check for change of starting letter or begging of chunk
613 if ( ($articles_start_char[$index] != $articles_start_char[$index - 1])
614 || ($index == $startChunk) )
615 {
616 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
617 }
618
619 $r .= "<li>{$articles[$index]}</li>";
620 }
621 $r .= '</ul></td>';
622
623
624 }
625 $r .= '</tr></table>';
626 } elseif ( count($articles) > 0) {
627 // for short lists of articles in categories.
628 $ti = $this->mTitle->getText() ;
629
630 $r .= '<h3>'.$articles_start_char[0]."</h3>\n";
631 $r .= '<ul><li>'.$articles[0].'</li>';
632 for ($index = 1; $index < count($articles); $index++ )
633 {
634 if ($articles_start_char[$index] != $articles_start_char[$index - 1])
635 {
636 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
637 }
638
639 $r .= "<li>{$articles[$index]}</li>";
640 }
641 $r .= '</ul>';
642 }
643
644
645 return $r ;
646 }
647
648 # Return allowed HTML attributes
649 function getHTMLattrs () {
650 $htmlattrs = array( # Allowed attributes--no scripting, etc.
651 'title', 'align', 'lang', 'dir', 'width', 'height',
652 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
653 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
654 /* FONT */ 'type', 'start', 'value', 'compact',
655 /* For various lists, mostly deprecated but safe */
656 'summary', 'width', 'border', 'frame', 'rules',
657 'cellspacing', 'cellpadding', 'valign', 'char',
658 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
659 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
660 'id', 'class', 'name', 'style' /* For CSS */
661 );
662 return $htmlattrs ;
663 }
664
665 # Remove non approved attributes and javascript in css
666 function fixTagAttributes ( $t ) {
667 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
668 $htmlattrs = $this->getHTMLattrs() ;
669
670 # Strip non-approved attributes from the tag
671 $t = preg_replace(
672 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
673 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
674 $t);
675
676 $t = str_replace ( "<></>" , "" , $t ) ; # This should fix bug 980557
677
678 # Strip javascript "expression" from stylesheets. Brute force approach:
679 # If anythin offensive is found, all attributes of the HTML tag are dropped
680
681 if( preg_match(
682 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
683 wfMungeToUtf8( $t ) ) )
684 {
685 $t='';
686 }
687
688 return trim ( $t ) ;
689 }
690
691 # interface with html tidy, used if $wgUseTidy = true
692 function tidy ( $text ) {
693 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
694 global $wgInputEncoding, $wgOutputEncoding;
695 $fname = 'Parser::tidy';
696 wfProfileIn( $fname );
697
698 $cleansource = '';
699 switch(strtoupper($wgOutputEncoding)) {
700 case 'ISO-8859-1':
701 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
702 break;
703 case 'UTF-8':
704 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
705 break;
706 default:
707 $wgTidyOpts .= ' -raw';
708 }
709
710 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
711 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
712 '<head><title>test</title></head><body>'.$text.'</body></html>';
713 $descriptorspec = array(
714 0 => array('pipe', 'r'),
715 1 => array('pipe', 'w'),
716 2 => array('file', '/dev/null', 'a')
717 );
718 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
719 if (is_resource($process)) {
720 fwrite($pipes[0], $wrappedtext);
721 fclose($pipes[0]);
722 while (!feof($pipes[1])) {
723 $cleansource .= fgets($pipes[1], 1024);
724 }
725 fclose($pipes[1]);
726 $return_value = proc_close($process);
727 }
728
729 wfProfileOut( $fname );
730
731 if( $cleansource == '' && $text != '') {
732 wfDebug( "Tidy error detected!\n" );
733 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
734 } else {
735 return $cleansource;
736 }
737 }
738
739 # parse the wiki syntax used to render tables
740 function doTableStuff ( $t ) {
741 $fname = 'Parser::doTableStuff';
742 wfProfileIn( $fname );
743
744 $t = explode ( "\n" , $t ) ;
745 $td = array () ; # Is currently a td tag open?
746 $ltd = array () ; # Was it TD or TH?
747 $tr = array () ; # Is currently a tr tag open?
748 $ltr = array () ; # tr attributes
749 $indent_level = 0; # indent level of the table
750 foreach ( $t AS $k => $x )
751 {
752 $x = trim ( $x ) ;
753 $fc = substr ( $x , 0 , 1 ) ;
754 if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) )
755 {
756 $indent_level = strlen( $matches[1] );
757 $t[$k] = "\n" .
758 str_repeat( "<dl><dd>", $indent_level ) .
759 "<table " . $this->fixTagAttributes ( $matches[2] ) . '>' ;
760 array_push ( $td , false ) ;
761 array_push ( $ltd , '' ) ;
762 array_push ( $tr , false ) ;
763 array_push ( $ltr , '' ) ;
764 }
765 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
766 else if ( '|}' == substr ( $x , 0 , 2 ) )
767 {
768 $z = "</table>\n" ;
769 $l = array_pop ( $ltd ) ;
770 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
771 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
772 array_pop ( $ltr ) ;
773 $t[$k] = $z . str_repeat( "</dd></dl>", $indent_level );
774 }
775 else if ( '|-' == substr ( $x , 0 , 2 ) ) # Allows for |---------------
776 {
777 $x = substr ( $x , 1 ) ;
778 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
779 $z = '' ;
780 $l = array_pop ( $ltd ) ;
781 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
782 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
783 array_pop ( $ltr ) ;
784 $t[$k] = $z ;
785 array_push ( $tr , false ) ;
786 array_push ( $td , false ) ;
787 array_push ( $ltd , '' ) ;
788 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
789 }
790 else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) # Caption
791 {
792 if ( '|+' == substr ( $x , 0 , 2 ) )
793 {
794 $fc = '+' ;
795 $x = substr ( $x , 1 ) ;
796 }
797 $after = substr ( $x , 1 ) ;
798 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
799 $after = explode ( '||' , $after ) ;
800 $t[$k] = '' ;
801 foreach ( $after AS $theline )
802 {
803 $z = '' ;
804 if ( $fc != '+' )
805 {
806 $tra = array_pop ( $ltr ) ;
807 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
808 array_push ( $tr , true ) ;
809 array_push ( $ltr , '' ) ;
810 }
811
812 $l = array_pop ( $ltd ) ;
813 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
814 if ( $fc == '|' ) $l = 'td' ;
815 else if ( $fc == '!' ) $l = 'th' ;
816 else if ( $fc == '+' ) $l = 'caption' ;
817 else $l = '' ;
818 array_push ( $ltd , $l ) ;
819 $y = explode ( '|' , $theline , 2 ) ;
820 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
821 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
822 $t[$k] .= $y ;
823 array_push ( $td , true ) ;
824 }
825 }
826 }
827
828 # Closing open td, tr && table
829 while ( count ( $td ) > 0 )
830 {
831 if ( array_pop ( $td ) ) $t[] = '</td>' ;
832 if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
833 $t[] = '</table>' ;
834 }
835
836 $t = implode ( "\n" , $t ) ;
837 # $t = $this->removeHTMLtags( $t );
838 wfProfileOut( $fname );
839 return $t ;
840 }
841
842 # Parses the text and adds the result to the strip state
843 # Returns the strip tag
844 function stripParse( $text, $newline, $args ) {
845 $text = $this->strip( $text, $this->mStripState );
846 $text = $this->internalParse( $text, (bool)$newline, $args, false );
847 return $newline.$this->insertStripItem( $text, $this->mStripState );
848 }
849
850 function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
851 $fname = 'Parser::internalParse';
852 wfProfileIn( $fname );
853
854 $text = $this->removeHTMLtags( $text );
855 $text = $this->replaceVariables( $text, $args );
856
857 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
858
859 $text = $this->doHeadings( $text );
860 if($this->mOptions->getUseDynamicDates()) {
861 global $wgDateFormatter;
862 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
863 }
864 $text = $this->doAllQuotes( $text );
865 $text = $this->replaceExternalLinks( $text );
866 $text = $this->doMagicLinks( $text );
867 $text = $this->replaceInternalLinks ( $text );
868 $text = $this->replaceInternalLinks ( $text );
869
870 $text = $this->unstrip( $text, $this->mStripState );
871 $text = $this->unstripNoWiki( $text, $this->mStripState );
872
873 $text = $this->doTableStuff( $text );
874 $text = $this->formatHeadings( $text, $isMain );
875 $sk =& $this->mOptions->getSkin();
876 $text = $sk->transformContent( $text );
877
878 if ( $isMain && !isset ( $this->categoryMagicDone ) ) {
879 $text .= $this->categoryMagic () ;
880 $this->categoryMagicDone = true ;
881 }
882
883 wfProfileOut( $fname );
884 return $text;
885 }
886
887 /* private */ function &doMagicLinks( &$text ) {
888 global $wgUseGeoMode;
889 $text = $this->magicISBN( $text );
890 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
891 $text = $this->magicGEO( $text );
892 }
893 $text = $this->magicRFC( $text );
894 return $text;
895 }
896
897 # Parse ^^ tokens and return html
898 /* private */ function doExponent ( $text ) {
899 $fname = 'Parser::doExponent';
900 wfProfileIn( $fname);
901 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
902 wfProfileOut( $fname);
903 return $text;
904 }
905
906 # Parse headers and return html
907 /* private */ function doHeadings( $text ) {
908 $fname = 'Parser::doHeadings';
909 wfProfileIn( $fname );
910 for ( $i = 6; $i >= 1; --$i ) {
911 $h = substr( '======', 0, $i );
912 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
913 "<h{$i}>\\1</h{$i}>\\2", $text );
914 }
915 wfProfileOut( $fname );
916 return $text;
917 }
918
919 /* private */ function doAllQuotes( $text ) {
920 $fname = 'Parser::doAllQuotes';
921 wfProfileIn( $fname );
922 $outtext = '';
923 $lines = explode( "\n", $text );
924 foreach ( $lines as $line ) {
925 $outtext .= $this->doQuotes ( $line ) . "\n";
926 }
927 $outtext = substr($outtext, 0,-1);
928 wfProfileOut( $fname );
929 return $outtext;
930 }
931
932 /* private */ function doQuotes( $text ) {
933 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
934 if (count ($arr) == 1)
935 return $text;
936 else
937 {
938 # First, do some preliminary work. This may shift some apostrophes from
939 # being mark-up to being text. It also counts the number of occurrences
940 # of bold and italics mark-ups.
941 $i = 0;
942 $numbold = 0;
943 $numitalics = 0;
944 foreach ($arr as $r)
945 {
946 if (($i % 2) == 1)
947 {
948 # If there are ever four apostrophes, assume the first is supposed to
949 # be text, and the remaining three constitute mark-up for bold text.
950 if (strlen ($arr[$i]) == 4)
951 {
952 $arr[$i-1] .= "'";
953 $arr[$i] = "'''";
954 }
955 # If there are more than 5 apostrophes in a row, assume they're all
956 # text except for the last 5.
957 else if (strlen ($arr[$i]) > 5)
958 {
959 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
960 $arr[$i] = "'''''";
961 }
962 # Count the number of occurrences of bold and italics mark-ups.
963 # We are not counting sequences of five apostrophes.
964 if (strlen ($arr[$i]) == 2) $numitalics++; else
965 if (strlen ($arr[$i]) == 3) $numbold++; else
966 if (strlen ($arr[$i]) == 5) { $numitalics++; $numbold++; }
967 }
968 $i++;
969 }
970
971 # If there is an odd number of both bold and italics, it is likely
972 # that one of the bold ones was meant to be an apostrophe followed
973 # by italics. Which one we cannot know for certain, but it is more
974 # likely to be one that has a single-letter word before it.
975 if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
976 {
977 $i = 0;
978 $firstsingleletterword = -1;
979 $firstmultiletterword = -1;
980 $firstspace = -1;
981 foreach ($arr as $r)
982 {
983 if (($i % 2 == 1) and (strlen ($r) == 3))
984 {
985 $x1 = substr ($arr[$i-1], -1);
986 $x2 = substr ($arr[$i-1], -2, 1);
987 if ($x1 == " ") {
988 if ($firstspace == -1) $firstspace = $i;
989 } else if ($x2 == " ") {
990 if ($firstsingleletterword == -1) $firstsingleletterword = $i;
991 } else {
992 if ($firstmultiletterword == -1) $firstmultiletterword = $i;
993 }
994 }
995 $i++;
996 }
997
998 # If there is a single-letter word, use it!
999 if ($firstsingleletterword > -1)
1000 {
1001 $arr [ $firstsingleletterword ] = "''";
1002 $arr [ $firstsingleletterword-1 ] .= "'";
1003 }
1004 # If not, but there's a multi-letter word, use that one.
1005 else if ($firstmultiletterword > -1)
1006 {
1007 $arr [ $firstmultiletterword ] = "''";
1008 $arr [ $firstmultiletterword-1 ] .= "'";
1009 }
1010 # ... otherwise use the first one that has neither.
1011 # (notice that it is possible for all three to be -1 if, for example,
1012 # there is only one pentuple-apostrophe in the line)
1013 else if ($firstspace > -1)
1014 {
1015 $arr [ $firstspace ] = "''";
1016 $arr [ $firstspace-1 ] .= "'";
1017 }
1018 }
1019
1020 # Now let's actually convert our apostrophic mush to HTML!
1021 $output = '';
1022 $buffer = '';
1023 $state = '';
1024 $i = 0;
1025 foreach ($arr as $r)
1026 {
1027 if (($i % 2) == 0)
1028 {
1029 if ($state == 'both')
1030 $buffer .= $r;
1031 else
1032 $output .= $r;
1033 }
1034 else
1035 {
1036 if (strlen ($r) == 2)
1037 {
1038 if ($state == 'em')
1039 { $output .= "</em>"; $state = ''; }
1040 else if ($state == 'strongem')
1041 { $output .= "</em>"; $state = 'strong'; }
1042 else if ($state == 'emstrong')
1043 { $output .= "</strong></em><strong>"; $state = 'strong'; }
1044 else if ($state == 'both')
1045 { $output .= "<strong><em>{$buffer}</em>"; $state = 'strong'; }
1046 else # $state can be 'strong' or ''
1047 { $output .= "<em>"; $state .= 'em'; }
1048 }
1049 else if (strlen ($r) == 3)
1050 {
1051 if ($state == 'strong')
1052 { $output .= "</strong>"; $state = ''; }
1053 else if ($state == 'strongem')
1054 { $output .= "</em></strong><em>"; $state = 'em'; }
1055 else if ($state == 'emstrong')
1056 { $output .= "</strong>"; $state = 'em'; }
1057 else if ($state == 'both')
1058 { $output .= "<em><strong>{$buffer}</strong>"; $state = 'em'; }
1059 else # $state can be 'em' or ''
1060 { $output .= "<strong>"; $state .= 'strong'; }
1061 }
1062 else if (strlen ($r) == 5)
1063 {
1064 if ($state == 'strong')
1065 { $output .= "</strong><em>"; $state = 'em'; }
1066 else if ($state == 'em')
1067 { $output .= "</em><strong>"; $state = 'strong'; }
1068 else if ($state == 'strongem')
1069 { $output .= "</em></strong>"; $state = ''; }
1070 else if ($state == 'emstrong')
1071 { $output .= "</strong></em>"; $state = ''; }
1072 else if ($state == 'both')
1073 { $output .= "<em><strong>{$buffer}</strong></em>"; $state = ''; }
1074 else # ($state == '')
1075 { $buffer = ''; $state = 'both'; }
1076 }
1077 }
1078 $i++;
1079 }
1080 # Now close all remaining tags. Notice that the order is important.
1081 if ($state == 'strong' || $state == 'emstrong')
1082 $output .= '</strong>';
1083 if ($state == 'em' || $state == 'strongem' || $state == 'emstrong')
1084 $output .= '</em>';
1085 if ($state == 'strongem')
1086 $output .= '</strong>';
1087 if ($state == 'both')
1088 $output .= "<strong><em>{$buffer}</em></strong>";
1089 return $output;
1090 }
1091 }
1092
1093 # Note: we have to do external links before the internal ones,
1094 # and otherwise take great care in the order of things here, so
1095 # that we don't end up interpreting some URLs twice.
1096
1097 /* private */ function replaceExternalLinks( $text ) {
1098 $fname = 'Parser::replaceExternalLinks';
1099 wfProfileIn( $fname );
1100
1101 $sk =& $this->mOptions->getSkin();
1102 $linktrail = wfMsg('linktrail');
1103 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1104
1105 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
1106
1107 $i = 0;
1108 while ( $i<count( $bits ) ) {
1109 $url = $bits[$i++];
1110 $protocol = $bits[$i++];
1111 $text = $bits[$i++];
1112 $trail = $bits[$i++];
1113
1114 # If the link text is an image URL, replace it with an <img> tag
1115 # This happened by accident in the original parser, but some people used it extensively
1116 $img = $this->maybeMakeImageLink( $text );
1117 if ( $img !== false ) {
1118 $text = $img;
1119 }
1120
1121 $dtrail = '';
1122
1123 # No link text, e.g. [http://domain.tld/some.link]
1124 if ( $text == '' ) {
1125 # Autonumber if allowed
1126 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
1127 $text = "[" . ++$this->mAutonumber . "]";
1128 } else {
1129 # Otherwise just use the URL
1130 $text = htmlspecialchars( $url );
1131 }
1132 } else {
1133 # Have link text, e.g. [http://domain.tld/some.link text]s
1134 # Check for trail
1135 if ( preg_match( $linktrail, $trail, $m2 ) ) {
1136 $dtrail = $m2[1];
1137 $trail = $m2[2];
1138 }
1139 }
1140
1141 $encUrl = htmlspecialchars( $url );
1142 # Bit in parentheses showing the URL for the printable version
1143 if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $url ) ) {
1144 $paren = '';
1145 } else {
1146 # Expand the URL for printable version
1147 if ( ! $sk->suppressUrlExpansion() ) {
1148 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
1149 } else {
1150 $paren = '';
1151 }
1152 }
1153
1154 # Process the trail (i.e. everything after this link up until start of the next link),
1155 # replacing any non-bracketed links
1156 $trail = $this->replaceFreeExternalLinks( $trail );
1157
1158 $la = $sk->getExternalLinkAttributes( $url, $text );
1159
1160 # Use the encoded URL
1161 # This means that users can paste URLs directly into the text
1162 # Funny characters like &ouml; aren't valid in URLs anyway
1163 # This was changed in August 2004
1164 $s .= "<a href=\"{$url}\" {$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
1165 }
1166
1167 wfProfileOut( $fname );
1168 return $s;
1169 }
1170
1171 # Replace anything that looks like a URL with a link
1172 function replaceFreeExternalLinks( $text ) {
1173 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1174 $s = array_shift( $bits );
1175 $i = 0;
1176
1177 $sk =& $this->mOptions->getSkin();
1178
1179 while ( $i < count( $bits ) ){
1180 $protocol = $bits[$i++];
1181 $remainder = $bits[$i++];
1182
1183 if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
1184 # Found some characters after the protocol that look promising
1185 $url = $protocol . $m[1];
1186 $trail = $m[2];
1187
1188 # Move trailing punctuation to $trail
1189 $sep = ',;\.:!?';
1190 # If there is no left bracket, then consider right brackets fair game too
1191 if ( strpos( $url, '(' ) === false ) {
1192 $sep .= ')';
1193 }
1194
1195 $numSepChars = strspn( strrev( $url ), $sep );
1196 if ( $numSepChars ) {
1197 $trail = substr( $url, -$numSepChars ) . $trail;
1198 $url = substr( $url, 0, -$numSepChars );
1199 }
1200
1201 # Replace &amp; from obsolete syntax with &
1202 $url = str_replace( '&amp;', '&', $url );
1203
1204 # Is this an external image?
1205 $text = $this->maybeMakeImageLink( $url );
1206 if ( $text === false ) {
1207 # Not an image, make a link
1208 $text = $sk->makeExternalLink( $url, $url );
1209 }
1210 $s .= $text . $trail;
1211 } else {
1212 $s .= $protocol . $remainder;
1213 }
1214 }
1215 return $s;
1216 }
1217
1218 # make an image if it's allowed
1219 function maybeMakeImageLink( $url ) {
1220 $sk =& $this->mOptions->getSkin();
1221 $text = false;
1222 if ( $this->mOptions->getAllowExternalImages() ) {
1223 if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
1224 # Image found
1225 $text = $sk->makeImage( htmlspecialchars( $url ) );
1226 }
1227 }
1228 return $text;
1229 }
1230
1231 /* private */ function replaceInternalLinks( $s ) {
1232 global $wgLang, $wgLinkCache;
1233 global $wgNamespacesWithSubpages, $wgLanguageCode;
1234 static $fname = 'Parser::replaceInternalLinks' ;
1235 wfProfileIn( $fname );
1236
1237 wfProfileIn( $fname.'-setup' );
1238 static $tc = FALSE;
1239 # the % is needed to support urlencoded titles as well
1240 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
1241 $sk =& $this->mOptions->getSkin();
1242
1243 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
1244
1245 $a = explode( '[[', ' ' . $s );
1246 $s = array_shift( $a );
1247 $s = substr( $s, 1 );
1248
1249 # Match a link having the form [[namespace:link|alternate]]trail
1250 static $e1 = FALSE;
1251 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1252 # Match the end of a line for a word that's not followed by whitespace,
1253 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1254 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1255
1256 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
1257 # Special and Media are pseudo-namespaces; no pages actually exist in them
1258
1259 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1260
1261 if ( $useLinkPrefixExtension ) {
1262 if ( preg_match( $e2, $s, $m ) ) {
1263 $first_prefix = $m[2];
1264 $s = $m[1];
1265 } else {
1266 $first_prefix = false;
1267 }
1268 } else {
1269 $prefix = '';
1270 }
1271
1272 wfProfileOut( $fname.'-setup' );
1273
1274 foreach ( $a as $line ) {
1275 wfProfileIn( $fname.'-prefixhandling' );
1276 if ( $useLinkPrefixExtension ) {
1277 if ( preg_match( $e2, $s, $m ) ) {
1278 $prefix = $m[2];
1279 $s = $m[1];
1280 } else {
1281 $prefix='';
1282 }
1283 # first link
1284 if($first_prefix) {
1285 $prefix = $first_prefix;
1286 $first_prefix = false;
1287 }
1288 }
1289 wfProfileOut( $fname.'-prefixhandling' );
1290
1291 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1292 $text = $m[2];
1293 # fix up urlencoded title texts
1294 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1295 $trail = $m[3];
1296 } else { # Invalid form; output directly
1297 $s .= $prefix . '[[' . $line ;
1298 continue;
1299 }
1300
1301 /* Valid link forms:
1302 Foobar -- normal
1303 :Foobar -- override special treatment of prefix (images, language links)
1304 /Foobar -- convert to CurrentPage/Foobar
1305 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1306 */
1307 $c = substr($m[1],0,1);
1308 $noforce = ($c != ':');
1309 if( $c == '/' ) { # subpage
1310 if(substr($m[1],-1,1)=='/') { # / at end means we don't want the slash to be shown
1311 $m[1]=substr($m[1],1,strlen($m[1])-2);
1312 $noslash=$m[1];
1313 } else {
1314 $noslash=substr($m[1],1);
1315 }
1316 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
1317 $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1318 if( '' == $text ) {
1319 $text= $m[1];
1320 } # this might be changed for ugliness reasons
1321 } else {
1322 $link = $noslash; # no subpage allowed, use standard link
1323 }
1324 } elseif( $noforce ) { # no subpage
1325 $link = $m[1];
1326 } else {
1327 $link = substr( $m[1], 1 );
1328 }
1329 $wasblank = ( '' == $text );
1330 if( $wasblank )
1331 $text = $link;
1332
1333 $nt = Title::newFromText( $link );
1334 if( !$nt ) {
1335 $s .= $prefix . '[[' . $line;
1336 continue;
1337 }
1338 $ns = $nt->getNamespace();
1339 $iw = $nt->getInterWiki();
1340 if( $noforce ) {
1341 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1342 array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1343 $tmp = $prefix . $trail ;
1344 $s .= (trim($tmp) == '')? '': $tmp;
1345 continue;
1346 }
1347 if ( $ns == NS_IMAGE ) {
1348 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1349 $wgLinkCache->addImageLinkObj( $nt );
1350 continue;
1351 }
1352 if ( $ns == NS_CATEGORY ) {
1353 $t = $nt->getText() ;
1354 $nnt = Title::newFromText ( Namespace::getCanonicalName(NS_CATEGORY).":".$t ) ;
1355
1356 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1357 $pPLC=$sk->postParseLinkColour();
1358 $sk->postParseLinkColour( false );
1359 $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1360 $sk->postParseLinkColour( $pPLC );
1361 $wgLinkCache->resume();
1362
1363 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
1364 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1365 $this->mOutput->mCategoryLinks[] = $t ;
1366 $s .= $prefix . $trail ;
1367 continue;
1368 }
1369 }
1370 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
1371 ( strpos( $link, '#' ) == FALSE ) ) {
1372 # Self-links are handled specially; generally de-link and change to bold.
1373 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1374 continue;
1375 }
1376
1377 if( $ns == NS_MEDIA ) {
1378 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1379 $wgLinkCache->addImageLinkObj( $nt );
1380 continue;
1381 } elseif( $ns == NS_SPECIAL ) {
1382 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1383 continue;
1384 }
1385 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1386 }
1387 wfProfileOut( $fname );
1388 return $s;
1389 }
1390
1391 # Some functions here used by doBlockLevels()
1392 #
1393 /* private */ function closeParagraph() {
1394 $result = '';
1395 if ( '' != $this->mLastSection ) {
1396 $result = '</' . $this->mLastSection . ">\n";
1397 }
1398 $this->mInPre = false;
1399 $this->mLastSection = '';
1400 return $result;
1401 }
1402 # getCommon() returns the length of the longest common substring
1403 # of both arguments, starting at the beginning of both.
1404 #
1405 /* private */ function getCommon( $st1, $st2 ) {
1406 $fl = strlen( $st1 );
1407 $shorter = strlen( $st2 );
1408 if ( $fl < $shorter ) { $shorter = $fl; }
1409
1410 for ( $i = 0; $i < $shorter; ++$i ) {
1411 if ( $st1{$i} != $st2{$i} ) { break; }
1412 }
1413 return $i;
1414 }
1415 # These next three functions open, continue, and close the list
1416 # element appropriate to the prefix character passed into them.
1417 #
1418 /* private */ function openList( $char ) {
1419 $result = $this->closeParagraph();
1420
1421 if ( '*' == $char ) { $result .= '<ul><li>'; }
1422 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1423 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1424 else if ( ';' == $char ) {
1425 $result .= '<dl><dt>';
1426 $this->mDTopen = true;
1427 }
1428 else { $result = '<!-- ERR 1 -->'; }
1429
1430 return $result;
1431 }
1432
1433 /* private */ function nextItem( $char ) {
1434 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1435 else if ( ':' == $char || ';' == $char ) {
1436 $close = '</dd>';
1437 if ( $this->mDTopen ) { $close = '</dt>'; }
1438 if ( ';' == $char ) {
1439 $this->mDTopen = true;
1440 return $close . '<dt>';
1441 } else {
1442 $this->mDTopen = false;
1443 return $close . '<dd>';
1444 }
1445 }
1446 return '<!-- ERR 2 -->';
1447 }
1448
1449 /* private */function closeList( $char ) {
1450 if ( '*' == $char ) { $text = '</li></ul>'; }
1451 else if ( '#' == $char ) { $text = '</li></ol>'; }
1452 else if ( ':' == $char ) {
1453 if ( $this->mDTopen ) {
1454 $this->mDTopen = false;
1455 $text = '</dt></dl>';
1456 } else {
1457 $text = '</dd></dl>';
1458 }
1459 }
1460 else { return '<!-- ERR 3 -->'; }
1461 return $text."\n";
1462 }
1463
1464 /* private */ function doBlockLevels( $text, $linestart ) {
1465 $fname = 'Parser::doBlockLevels';
1466 wfProfileIn( $fname );
1467
1468 # Parsing through the text line by line. The main thing
1469 # happening here is handling of block-level elements p, pre,
1470 # and making lists from lines starting with * # : etc.
1471 #
1472 $textLines = explode( "\n", $text );
1473
1474 $lastPrefix = $output = $lastLine = '';
1475 $this->mDTopen = $inBlockElem = false;
1476 $prefixLength = 0;
1477 $paragraphStack = false;
1478
1479 if ( !$linestart ) {
1480 $output .= array_shift( $textLines );
1481 }
1482 foreach ( $textLines as $oLine ) {
1483 $lastPrefixLength = strlen( $lastPrefix );
1484 $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1485 $preOpenMatch = preg_match('/<pre/i', $oLine );
1486 if ( !$this->mInPre ) {
1487 # Multiple prefixes may abut each other for nested lists.
1488 $prefixLength = strspn( $oLine, '*#:;' );
1489 $pref = substr( $oLine, 0, $prefixLength );
1490
1491 # eh?
1492 $pref2 = str_replace( ';', ':', $pref );
1493 $t = substr( $oLine, $prefixLength );
1494 $this->mInPre = !empty($preOpenMatch);
1495 } else {
1496 # Don't interpret any other prefixes in preformatted text
1497 $prefixLength = 0;
1498 $pref = $pref2 = '';
1499 $t = $oLine;
1500 }
1501
1502 # List generation
1503 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1504 # Same as the last item, so no need to deal with nesting or opening stuff
1505 $output .= $this->nextItem( substr( $pref, -1 ) );
1506 $paragraphStack = false;
1507
1508 if ( substr( $pref, -1 ) == ';') {
1509 # The one nasty exception: definition lists work like this:
1510 # ; title : definition text
1511 # So we check for : in the remainder text to split up the
1512 # title and definition, without b0rking links.
1513 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1514 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1515 $term = $match[1];
1516 $output .= $term . $this->nextItem( ':' );
1517 $t = $match[2];
1518 }
1519 }
1520 } elseif( $prefixLength || $lastPrefixLength ) {
1521 # Either open or close a level...
1522 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1523 $paragraphStack = false;
1524
1525 while( $commonPrefixLength < $lastPrefixLength ) {
1526 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1527 --$lastPrefixLength;
1528 }
1529 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1530 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1531 }
1532 while ( $prefixLength > $commonPrefixLength ) {
1533 $char = substr( $pref, $commonPrefixLength, 1 );
1534 $output .= $this->openList( $char );
1535
1536 if ( ';' == $char ) {
1537 # FIXME: This is dupe of code above
1538 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1539 $term = $match[1];
1540 $output .= $term . $this->nextItem( ":" );
1541 $t = $match[2];
1542 }
1543 }
1544 ++$commonPrefixLength;
1545 }
1546 $lastPrefix = $pref2;
1547 }
1548 if( 0 == $prefixLength ) {
1549 # No prefix (not in list)--go to paragraph mode
1550 $uniq_prefix = UNIQ_PREFIX;
1551 // XXX: use a stack for nestable elements like span, table and div
1552 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1553 $closematch = preg_match(
1554 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1555 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1556 if ( $openmatch or $closematch ) {
1557 $paragraphStack = false;
1558 $output .= $this->closeParagraph();
1559 if($preOpenMatch and !$preCloseMatch) {
1560 $this->mInPre = true;
1561 }
1562 if ( $closematch ) {
1563 $inBlockElem = false;
1564 } else {
1565 $inBlockElem = true;
1566 }
1567 } else if ( !$inBlockElem && !$this->mInPre ) {
1568 if ( " " == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1569 // pre
1570 if ($this->mLastSection != 'pre') {
1571 $paragraphStack = false;
1572 $output .= $this->closeParagraph().'<pre>';
1573 $this->mLastSection = 'pre';
1574 }
1575 } else {
1576 // paragraph
1577 if ( '' == trim($t) ) {
1578 if ( $paragraphStack ) {
1579 $output .= $paragraphStack.'<br />';
1580 $paragraphStack = false;
1581 $this->mLastSection = 'p';
1582 } else {
1583 if ($this->mLastSection != 'p' ) {
1584 $output .= $this->closeParagraph();
1585 $this->mLastSection = '';
1586 $paragraphStack = '<p>';
1587 } else {
1588 $paragraphStack = '</p><p>';
1589 }
1590 }
1591 } else {
1592 if ( $paragraphStack ) {
1593 $output .= $paragraphStack;
1594 $paragraphStack = false;
1595 $this->mLastSection = 'p';
1596 } else if ($this->mLastSection != 'p') {
1597 $output .= $this->closeParagraph().'<p>';
1598 $this->mLastSection = 'p';
1599 }
1600 }
1601 }
1602 }
1603 }
1604 if ($paragraphStack === false) {
1605 $output .= $t."\n";
1606 }
1607 }
1608 while ( $prefixLength ) {
1609 $output .= $this->closeList( $pref2{$prefixLength-1} );
1610 --$prefixLength;
1611 }
1612 if ( '' != $this->mLastSection ) {
1613 $output .= '</' . $this->mLastSection . '>';
1614 $this->mLastSection = '';
1615 }
1616
1617 wfProfileOut( $fname );
1618 return $output;
1619 }
1620
1621 # Return value of a magic variable (like PAGENAME)
1622 function getVariableValue( $index ) {
1623 global $wgLang, $wgSitename, $wgServer;
1624
1625 switch ( $index ) {
1626 case MAG_CURRENTMONTH:
1627 return $wgLang->formatNum( date( 'm' ) );
1628 case MAG_CURRENTMONTHNAME:
1629 return $wgLang->getMonthName( date('n') );
1630 case MAG_CURRENTMONTHNAMEGEN:
1631 return $wgLang->getMonthNameGen( date('n') );
1632 case MAG_CURRENTDAY:
1633 return $wgLang->formatNum( date('j') );
1634 case MAG_PAGENAME:
1635 return $this->mTitle->getText();
1636 case MAG_PAGENAMEE:
1637 return $this->mTitle->getPartialURL();
1638 case MAG_NAMESPACE:
1639 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1640 return $wgLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
1641 case MAG_CURRENTDAYNAME:
1642 return $wgLang->getWeekdayName( date('w')+1 );
1643 case MAG_CURRENTYEAR:
1644 return $wgLang->formatNum( date( 'Y' ) );
1645 case MAG_CURRENTTIME:
1646 return $wgLang->time( wfTimestampNow(), false );
1647 case MAG_NUMBEROFARTICLES:
1648 return $wgLang->formatNum( wfNumberOfArticles() );
1649 case MAG_SITENAME:
1650 return $wgSitename;
1651 case MAG_SERVER:
1652 return $wgServer;
1653 default:
1654 return NULL;
1655 }
1656 }
1657
1658 # initialise the magic variables (like CURRENTMONTHNAME)
1659 function initialiseVariables() {
1660 global $wgVariableIDs;
1661 $this->mVariables = array();
1662 foreach ( $wgVariableIDs as $id ) {
1663 $mw =& MagicWord::get( $id );
1664 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1665 }
1666 }
1667
1668 /* private */ function replaceVariables( $text, $args = array() ) {
1669 global $wgLang, $wgScript, $wgArticlePath;
1670
1671 # Prevent too big inclusions
1672 if(strlen($text)> MAX_INCLUDE_SIZE)
1673 return $text;
1674
1675 $fname = 'Parser::replaceVariables';
1676 wfProfileIn( $fname );
1677
1678 $bail = false;
1679 $titleChars = Title::legalChars();
1680 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars );
1681
1682 # This function is called recursively. To keep track of arguments we need a stack:
1683 array_push( $this->mArgStack, $args );
1684
1685 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1686 $GLOBALS['wgCurParser'] =& $this;
1687
1688
1689 if ( $this->mOutputType == OT_HTML ) {
1690 # Variable substitution
1691 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
1692
1693 # Argument substitution
1694 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1695 }
1696 # Template substitution
1697 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
1698 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1699
1700 array_pop( $this->mArgStack );
1701
1702 wfProfileOut( $fname );
1703 return $text;
1704 }
1705
1706 function variableSubstitution( $matches ) {
1707 if ( !$this->mVariables ) {
1708 $this->initialiseVariables();
1709 }
1710 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1711 $text = $this->mVariables[$matches[1]];
1712 $this->mOutput->mContainsOldMagic = true;
1713 } else {
1714 $text = $matches[0];
1715 }
1716 return $text;
1717 }
1718
1719 # Split template arguments
1720 function getTemplateArgs( $argsString ) {
1721 if ( $argsString === '' ) {
1722 return array();
1723 }
1724
1725 $args = explode( '|', substr( $argsString, 1 ) );
1726
1727 # If any of the arguments contains a '[[' but no ']]', it needs to be
1728 # merged with the next arg because the '|' character between belongs
1729 # to the link syntax and not the template parameter syntax.
1730 $argc = count($args);
1731 $i = 0;
1732 for ( $i = 0; $i < $argc-1; $i++ ) {
1733 if ( substr_count ( $args[$i], "[[" ) != substr_count ( $args[$i], "]]" ) ) {
1734 $args[$i] .= "|".$args[$i+1];
1735 array_splice($args, $i+1, 1);
1736 $i--;
1737 $argc--;
1738 }
1739 }
1740
1741 return $args;
1742 }
1743
1744 function braceSubstitution( $matches ) {
1745 global $wgLinkCache, $wgLang;
1746 $fname = 'Parser::braceSubstitution';
1747 $found = false;
1748 $nowiki = false;
1749 $noparse = false;
1750
1751 $title = NULL;
1752
1753 # $newline is an optional newline character before the braces
1754 # $part1 is the bit before the first |, and must contain only title characters
1755 # $args is a list of arguments, starting from index 0, not including $part1
1756
1757 $newline = $matches[1];
1758 $part1 = $matches[2];
1759 # If the third subpattern matched anything, it will start with |
1760
1761 $args = $this->getTemplateArgs($matches[3]);
1762 $argc = count( $args );
1763
1764 # {{{}}}
1765 if ( strpos( $matches[0], '{{{' ) !== false ) {
1766 $text = $matches[0];
1767 $found = true;
1768 $noparse = true;
1769 }
1770
1771 # SUBST
1772 if ( !$found ) {
1773 $mwSubst =& MagicWord::get( MAG_SUBST );
1774 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1775 if ( $this->mOutputType != OT_WIKI ) {
1776 # Invalid SUBST not replaced at PST time
1777 # Return without further processing
1778 $text = $matches[0];
1779 $found = true;
1780 $noparse= true;
1781 }
1782 } elseif ( $this->mOutputType == OT_WIKI ) {
1783 # SUBST not found in PST pass, do nothing
1784 $text = $matches[0];
1785 $found = true;
1786 }
1787 }
1788
1789 # MSG, MSGNW and INT
1790 if ( !$found ) {
1791 # Check for MSGNW:
1792 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1793 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1794 $nowiki = true;
1795 } else {
1796 # Remove obsolete MSG:
1797 $mwMsg =& MagicWord::get( MAG_MSG );
1798 $mwMsg->matchStartAndRemove( $part1 );
1799 }
1800
1801 # Check if it is an internal message
1802 $mwInt =& MagicWord::get( MAG_INT );
1803 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1804 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1805 $text = wfMsgReal( $part1, $args, true );
1806 $found = true;
1807 }
1808 }
1809 }
1810
1811 # NS
1812 if ( !$found ) {
1813 # Check for NS: (namespace expansion)
1814 $mwNs = MagicWord::get( MAG_NS );
1815 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1816 if ( intval( $part1 ) ) {
1817 $text = $wgLang->getNsText( intval( $part1 ) );
1818 $found = true;
1819 } else {
1820 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1821 if ( !is_null( $index ) ) {
1822 $text = $wgLang->getNsText( $index );
1823 $found = true;
1824 }
1825 }
1826 }
1827 }
1828
1829 # LOCALURL and LOCALURLE
1830 if ( !$found ) {
1831 $mwLocal = MagicWord::get( MAG_LOCALURL );
1832 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1833
1834 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1835 $func = 'getLocalURL';
1836 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1837 $func = 'escapeLocalURL';
1838 } else {
1839 $func = '';
1840 }
1841
1842 if ( $func !== '' ) {
1843 $title = Title::newFromText( $part1 );
1844 if ( !is_null( $title ) ) {
1845 if ( $argc > 0 ) {
1846 $text = $title->$func( $args[0] );
1847 } else {
1848 $text = $title->$func();
1849 }
1850 $found = true;
1851 }
1852 }
1853 }
1854
1855 # Internal variables
1856 if ( !$this->mVariables ) {
1857 $this->initialiseVariables();
1858 }
1859 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1860 $text = $this->mVariables[$part1];
1861 $found = true;
1862 $this->mOutput->mContainsOldMagic = true;
1863 }
1864
1865 # Template table test
1866
1867 # Did we encounter this template already? If yes, it is in the cache
1868 # and we need to check for loops.
1869 if ( isset( $this->mTemplates[$part1] ) ) {
1870 # Infinite loop test
1871 if ( isset( $this->mTemplatePath[$part1] ) ) {
1872 $noparse = true;
1873 $found = true;
1874 }
1875 # set $text to cached message.
1876 $text = $this->mTemplates[$part1];
1877 $found = true;
1878 }
1879
1880 # Load from database
1881 if ( !$found ) {
1882 $title = Title::newFromText( $part1, NS_TEMPLATE );
1883 if ( !is_null( $title ) && !$title->isExternal() ) {
1884 # Check for excessive inclusion
1885 $dbk = $title->getPrefixedDBkey();
1886 if ( $this->incrementIncludeCount( $dbk ) ) {
1887 # This should never be reached.
1888 $article = new Article( $title );
1889 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1890 if ( $articleContent !== false ) {
1891 $found = true;
1892 $text = $articleContent;
1893
1894 }
1895 }
1896
1897 # If the title is valid but undisplayable, make a link to it
1898 if ( $this->mOutputType == OT_HTML && !$found ) {
1899 $text = '[[' . $title->getPrefixedText() . ']]';
1900 $found = true;
1901 }
1902
1903 # Template cache array insertion
1904 $this->mTemplates[$part1] = $text;
1905 }
1906 }
1907
1908 # Recursive parsing, escaping and link table handling
1909 # Only for HTML output
1910 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1911 $text = wfEscapeWikiText( $text );
1912 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1913 # Clean up argument array
1914 $assocArgs = array();
1915 $index = 1;
1916 foreach( $args as $arg ) {
1917 $eqpos = strpos( $arg, '=' );
1918 if ( $eqpos === false ) {
1919 $assocArgs[$index++] = $arg;
1920 } else {
1921 $name = trim( substr( $arg, 0, $eqpos ) );
1922 $value = trim( substr( $arg, $eqpos+1 ) );
1923 if ( $value === false ) {
1924 $value = '';
1925 }
1926 if ( $name !== false ) {
1927 $assocArgs[$name] = $value;
1928 }
1929 }
1930 }
1931
1932 # Do not enter included links in link table
1933 if ( !is_null( $title ) ) {
1934 $wgLinkCache->suspend();
1935 }
1936
1937 # Add a new element to the templace recursion path
1938 $this->mTemplatePath[$part1] = 1;
1939
1940 $text = $this->stripParse( $text, $newline, $assocArgs );
1941
1942 # Resume the link cache and register the inclusion as a link
1943 if ( !is_null( $title ) ) {
1944 $wgLinkCache->resume();
1945 $wgLinkCache->addLinkObj( $title );
1946 }
1947 }
1948 # Empties the template path
1949 $this->mTemplatePath = array();
1950
1951 if ( !$found ) {
1952 return $matches[0];
1953 } else {
1954 return $text;
1955 }
1956 }
1957
1958 # Triple brace replacement -- used for template arguments
1959 function argSubstitution( $matches ) {
1960 $newline = $matches[1];
1961 $arg = trim( $matches[2] );
1962 $text = $matches[0];
1963 $inputArgs = end( $this->mArgStack );
1964
1965 if ( array_key_exists( $arg, $inputArgs ) ) {
1966 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1967 }
1968
1969 return $text;
1970 }
1971
1972 # Returns true if the function is allowed to include this entity
1973 function incrementIncludeCount( $dbk ) {
1974 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1975 $this->mIncludeCount[$dbk] = 0;
1976 }
1977 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1978 return true;
1979 } else {
1980 return false;
1981 }
1982 }
1983
1984
1985 # Cleans up HTML, removes dangerous tags and attributes
1986 /* private */ function removeHTMLtags( $text ) {
1987 global $wgUseTidy, $wgUserHtml;
1988 $fname = 'Parser::removeHTMLtags';
1989 wfProfileIn( $fname );
1990
1991 if( $wgUserHtml ) {
1992 $htmlpairs = array( # Tags that must be closed
1993 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
1994 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
1995 'strike', 'strong', 'tt', 'var', 'div', 'center',
1996 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
1997 'ruby', 'rt' , 'rb' , 'rp', 'p'
1998 );
1999 $htmlsingle = array(
2000 'br', 'hr', 'li', 'dt', 'dd'
2001 );
2002 $htmlnest = array( # Tags that can be nested--??
2003 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
2004 'dl', 'font', 'big', 'small', 'sub', 'sup'
2005 );
2006 $tabletags = array( # Can only appear inside table
2007 'td', 'th', 'tr'
2008 );
2009 } else {
2010 $htmlpairs = array();
2011 $htmlsingle = array();
2012 $htmlnest = array();
2013 $tabletags = array();
2014 }
2015
2016 $htmlsingle = array_merge( $tabletags, $htmlsingle );
2017 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
2018
2019 $htmlattrs = $this->getHTMLattrs () ;
2020
2021 # Remove HTML comments
2022 $text = preg_replace( '/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU', '$2', $text );
2023
2024 $bits = explode( '<', $text );
2025 $text = array_shift( $bits );
2026 if(!$wgUseTidy) {
2027 $tagstack = array(); $tablestack = array();
2028 foreach ( $bits as $x ) {
2029 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
2030 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2031 $x, $regs );
2032 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2033 error_reporting( $prev );
2034
2035 $badtag = 0 ;
2036 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2037 # Check our stack
2038 if ( $slash ) {
2039 # Closing a tag...
2040 if ( ! in_array( $t, $htmlsingle ) &&
2041 ( $ot = @array_pop( $tagstack ) ) != $t ) {
2042 @array_push( $tagstack, $ot );
2043 $badtag = 1;
2044 } else {
2045 if ( $t == 'table' ) {
2046 $tagstack = array_pop( $tablestack );
2047 }
2048 $newparams = '';
2049 }
2050 } else {
2051 # Keep track for later
2052 if ( in_array( $t, $tabletags ) &&
2053 ! in_array( 'table', $tagstack ) ) {
2054 $badtag = 1;
2055 } else if ( in_array( $t, $tagstack ) &&
2056 ! in_array ( $t , $htmlnest ) ) {
2057 $badtag = 1 ;
2058 } else if ( ! in_array( $t, $htmlsingle ) ) {
2059 if ( $t == 'table' ) {
2060 array_push( $tablestack, $tagstack );
2061 $tagstack = array();
2062 }
2063 array_push( $tagstack, $t );
2064 }
2065 # Strip non-approved attributes from the tag
2066 $newparams = $this->fixTagAttributes($params);
2067
2068 }
2069 if ( ! $badtag ) {
2070 $rest = str_replace( '>', '&gt;', $rest );
2071 $text .= "<$slash$t $newparams$brace$rest";
2072 continue;
2073 }
2074 }
2075 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2076 }
2077 # Close off any remaining tags
2078 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
2079 $text .= "</$t>\n";
2080 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2081 }
2082 } else {
2083 # this might be possible using tidy itself
2084 foreach ( $bits as $x ) {
2085 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2086 $x, $regs );
2087 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2088 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2089 $newparams = $this->fixTagAttributes($params);
2090 $rest = str_replace( '>', '&gt;', $rest );
2091 $text .= "<$slash$t $newparams$brace$rest";
2092 } else {
2093 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2094 }
2095 }
2096 }
2097 wfProfileOut( $fname );
2098 return $text;
2099 }
2100
2101
2102 /*
2103 *
2104 * This function accomplishes several tasks:
2105 * 1) Auto-number headings if that option is enabled
2106 * 2) Add an [edit] link to sections for logged in users who have enabled the option
2107 * 3) Add a Table of contents on the top for users who have enabled the option
2108 * 4) Auto-anchor headings
2109 *
2110 * It loops through all headlines, collects the necessary data, then splits up the
2111 * string and re-inserts the newly formatted headlines.
2112 *
2113 */
2114
2115 /* private */ function formatHeadings( $text, $isMain=true ) {
2116 global $wgInputEncoding, $wgMaxTocLevel;
2117
2118 $doNumberHeadings = $this->mOptions->getNumberHeadings();
2119 $doShowToc = $this->mOptions->getShowToc();
2120 $forceTocHere = false;
2121 if( !$this->mTitle->userCanEdit() ) {
2122 $showEditLink = 0;
2123 $rightClickHack = 0;
2124 } else {
2125 $showEditLink = $this->mOptions->getEditSection();
2126 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
2127 }
2128
2129 # Inhibit editsection links if requested in the page
2130 $esw =& MagicWord::get( MAG_NOEDITSECTION );
2131 if( $esw->matchAndRemove( $text ) ) {
2132 $showEditLink = 0;
2133 }
2134 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2135 # do not add TOC
2136 $mw =& MagicWord::get( MAG_NOTOC );
2137 if( $mw->matchAndRemove( $text ) ) {
2138 $doShowToc = 0;
2139 }
2140
2141 # never add the TOC to the Main Page. This is an entry page that should not
2142 # be more than 1-2 screens large anyway
2143 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
2144 $doShowToc = 0;
2145 }
2146
2147 # Get all headlines for numbering them and adding funky stuff like [edit]
2148 # links - this is for later, but we need the number of headlines right now
2149 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2150
2151 # if there are fewer than 4 headlines in the article, do not show TOC
2152 if( $numMatches < 4 ) {
2153 $doShowToc = 0;
2154 }
2155
2156 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2157 # override above conditions and always show TOC at that place
2158 $mw =& MagicWord::get( MAG_TOC );
2159 if ($mw->match( $text ) ) {
2160 $doShowToc = 1;
2161 $forceTocHere = true;
2162 } else {
2163 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2164 # override above conditions and always show TOC above first header
2165 $mw =& MagicWord::get( MAG_FORCETOC );
2166 if ($mw->matchAndRemove( $text ) ) {
2167 $doShowToc = 1;
2168 }
2169 }
2170
2171
2172
2173 # We need this to perform operations on the HTML
2174 $sk =& $this->mOptions->getSkin();
2175
2176 # headline counter
2177 $headlineCount = 0;
2178
2179 # Ugh .. the TOC should have neat indentation levels which can be
2180 # passed to the skin functions. These are determined here
2181 $toclevel = 0;
2182 $toc = '';
2183 $full = '';
2184 $head = array();
2185 $sublevelCount = array();
2186 $level = 0;
2187 $prevlevel = 0;
2188 foreach( $matches[3] as $headline ) {
2189 $numbering = '';
2190 if( $level ) {
2191 $prevlevel = $level;
2192 }
2193 $level = $matches[1][$headlineCount];
2194 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
2195 # reset when we enter a new level
2196 $sublevelCount[$level] = 0;
2197 $toc .= $sk->tocIndent( $level - $prevlevel );
2198 $toclevel += $level - $prevlevel;
2199 }
2200 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
2201 # reset when we step back a level
2202 $sublevelCount[$level+1]=0;
2203 $toc .= $sk->tocUnindent( $prevlevel - $level );
2204 $toclevel -= $prevlevel - $level;
2205 }
2206 # count number of headlines for each level
2207 @$sublevelCount[$level]++;
2208 if( $doNumberHeadings || $doShowToc ) {
2209 $dot = 0;
2210 for( $i = 1; $i <= $level; $i++ ) {
2211 if( !empty( $sublevelCount[$i] ) ) {
2212 if( $dot ) {
2213 $numbering .= '.';
2214 }
2215 $numbering .= $sublevelCount[$i];
2216 $dot = 1;
2217 }
2218 }
2219 }
2220
2221 # The canonized header is a version of the header text safe to use for links
2222 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2223 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
2224 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
2225
2226 # strip out HTML
2227 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2228 $tocline = trim( $canonized_headline );
2229 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
2230 $replacearray = array(
2231 '%3A' => ':',
2232 '%' => '.'
2233 );
2234 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2235 $refer[$headlineCount] = $canonized_headline;
2236
2237 # count how many in assoc. array so we can track dupes in anchors
2238 @$refers[$canonized_headline]++;
2239 $refcount[$headlineCount]=$refers[$canonized_headline];
2240
2241 # Prepend the number to the heading text
2242
2243 if( $doNumberHeadings || $doShowToc ) {
2244 $tocline = $numbering . ' ' . $tocline;
2245
2246 # Don't number the heading if it is the only one (looks silly)
2247 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2248 # the two are different if the line contains a link
2249 $headline=$numbering . ' ' . $headline;
2250 }
2251 }
2252
2253 # Create the anchor for linking from the TOC to the section
2254 $anchor = $canonized_headline;
2255 if($refcount[$headlineCount] > 1 ) {
2256 $anchor .= '_' . $refcount[$headlineCount];
2257 }
2258 if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2259 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2260 }
2261 if( $showEditLink ) {
2262 if ( empty( $head[$headlineCount] ) ) {
2263 $head[$headlineCount] = '';
2264 }
2265 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
2266 }
2267
2268 # Add the edit section span
2269 if( $rightClickHack ) {
2270 $headline = $sk->editSectionScript($headlineCount+1,$headline);
2271 }
2272
2273 # give headline the correct <h#> tag
2274 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
2275
2276 $headlineCount++;
2277 }
2278
2279 if( $doShowToc ) {
2280 $toclines = $headlineCount;
2281 $toc .= $sk->tocUnindent( $toclevel );
2282 $toc = $sk->tocTable( $toc );
2283 }
2284
2285 # split up and insert constructed headlines
2286
2287 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2288 $i = 0;
2289
2290 foreach( $blocks as $block ) {
2291 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2292 # This is the [edit] link that appears for the top block of text when
2293 # section editing is enabled
2294
2295 # Disabled because it broke block formatting
2296 # For example, a bullet point in the top line
2297 # $full .= $sk->editSectionLink(0);
2298 }
2299 $full .= $block;
2300 if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2301 # Top anchor now in skin
2302 $full = $full.$toc;
2303 }
2304
2305 if( !empty( $head[$i] ) ) {
2306 $full .= $head[$i];
2307 }
2308 $i++;
2309 }
2310 if($forceTocHere) {
2311 $mw =& MagicWord::get( MAG_TOC );
2312 return $mw->replace( $toc, $full );
2313 } else {
2314 return $full;
2315 }
2316 }
2317
2318 # Return an HTML link for the "ISBN 123456" text
2319 /* private */ function magicISBN( $text ) {
2320 global $wgLang;
2321 $fname = 'Parser::magicISBN';
2322 wfProfileIn( $fname );
2323
2324 $a = split( 'ISBN ', " $text" );
2325 if ( count ( $a ) < 2 ) {
2326 wfProfileOut( $fname );
2327 return $text;
2328 }
2329 $text = substr( array_shift( $a ), 1);
2330 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2331
2332 foreach ( $a as $x ) {
2333 $isbn = $blank = '' ;
2334 while ( ' ' == $x{0} ) {
2335 $blank .= ' ';
2336 $x = substr( $x, 1 );
2337 }
2338 while ( strstr( $valid, $x{0} ) != false ) {
2339 $isbn .= $x{0};
2340 $x = substr( $x, 1 );
2341 }
2342 $num = str_replace( '-', '', $isbn );
2343 $num = str_replace( ' ', '', $num );
2344
2345 if ( '' == $num ) {
2346 $text .= "ISBN $blank$x";
2347 } else {
2348 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2349 $text .= '<a href="' .
2350 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
2351 "\" class=\"internal\">ISBN $isbn</a>";
2352 $text .= $x;
2353 }
2354 }
2355 wfProfileOut( $fname );
2356 return $text;
2357 }
2358
2359 # Return an HTML link for the "GEO ..." text
2360 /* private */ function magicGEO( $text ) {
2361 global $wgLang, $wgUseGeoMode;
2362 $fname = 'Parser::magicGEO';
2363 wfProfileIn( $fname );
2364
2365 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2366 $directions = array ( "N" => "North" , "S" => "South" , "E" => "East" , "W" => "West" ) ;
2367 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2368 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2369 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2370 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2371
2372 $a = split( 'GEO ', " $text" );
2373 if ( count ( $a ) < 2 ) {
2374 wfProfileOut( $fname );
2375 return $text;
2376 }
2377 $text = substr( array_shift( $a ), 1);
2378 $valid = '0123456789.+-:';
2379
2380 foreach ( $a as $x ) {
2381 $geo = $blank = '' ;
2382 while ( ' ' == $x{0} ) {
2383 $blank .= ' ';
2384 $x = substr( $x, 1 );
2385 }
2386 while ( strstr( $valid, $x{0} ) != false ) {
2387 $geo .= $x{0};
2388 $x = substr( $x, 1 );
2389 }
2390 $num = str_replace( '+', '', $geo );
2391 $num = str_replace( ' ', '', $num );
2392
2393 if ( '' == $num || count ( explode ( ":" , $num , 3 ) ) < 2 ) {
2394 $text .= "GEO $blank$x";
2395 } else {
2396 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2397 $text .= '<a href="' .
2398 $titleObj->escapeLocalUrl( "coordinates={$num}" ) .
2399 "\" class=\"internal\">GEO $geo</a>";
2400 $text .= $x;
2401 }
2402 }
2403 wfProfileOut( $fname );
2404 return $text;
2405 }
2406
2407 # Return an HTML link for the "RFC 1234" text
2408 /* private */ function magicRFC( $text ) {
2409 global $wgLang;
2410
2411 $a = split( 'RFC ', ' '.$text );
2412 if ( count ( $a ) < 2 ) return $text;
2413 $text = substr( array_shift( $a ), 1);
2414 $valid = '0123456789';
2415
2416 foreach ( $a as $x ) {
2417 $rfc = $blank = '' ;
2418 while ( ' ' == $x{0} ) {
2419 $blank .= ' ';
2420 $x = substr( $x, 1 );
2421 }
2422 while ( strstr( $valid, $x{0} ) != false ) {
2423 $rfc .= $x{0};
2424 $x = substr( $x, 1 );
2425 }
2426
2427 if ( '' == $rfc ) {
2428 $text .= "RFC $blank$x";
2429 } else {
2430 $url = wfmsg( 'rfcurl' );
2431 $url = str_replace( '$1', $rfc, $url);
2432 $sk =& $this->mOptions->getSkin();
2433 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
2434 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2435 }
2436 }
2437 return $text;
2438 }
2439
2440 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2441 $this->mOptions = $options;
2442 $this->mTitle =& $title;
2443 $this->mOutputType = OT_WIKI;
2444
2445 if ( $clearState ) {
2446 $this->clearState();
2447 }
2448
2449 $stripState = false;
2450 $pairs = array(
2451 "\r\n" => "\n",
2452 );
2453 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2454 // now with regexes
2455 /*
2456 $pairs = array(
2457 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2458 "/<br *?>/i" => "<br />",
2459 );
2460 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2461 */
2462 $text = $this->strip( $text, $stripState, false );
2463 $text = $this->pstPass2( $text, $user );
2464 $text = $this->unstrip( $text, $stripState );
2465 $text = $this->unstripNoWiki( $text, $stripState );
2466 return $text;
2467 }
2468
2469 /* private */ function pstPass2( $text, &$user ) {
2470 global $wgLang, $wgLocaltimezone, $wgCurParser;
2471
2472 # Variable replacement
2473 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2474 $text = $this->replaceVariables( $text );
2475
2476 # Signatures
2477 #
2478 $n = $user->getName();
2479 $k = $user->getOption( 'nickname' );
2480 if ( '' == $k ) { $k = $n; }
2481 if(isset($wgLocaltimezone)) {
2482 $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2483 }
2484 /* Note: this is an ugly timezone hack for the European wikis */
2485 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2486 ' (' . date( 'T' ) . ')';
2487 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2488
2489 $text = preg_replace( '/~~~~~/', $d, $text );
2490 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2491 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2492
2493 # Context links: [[|name]] and [[name (context)|]]
2494 #
2495 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2496 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2497 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2498 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2499
2500 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
2501 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
2502 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
2503 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
2504 # [[ns:page (cont)|]]
2505 $context = "";
2506 $t = $this->mTitle->getText();
2507 if ( preg_match( $conpat, $t, $m ) ) {
2508 $context = $m[2];
2509 }
2510 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2511 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2512 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2513
2514 if ( '' == $context ) {
2515 $text = preg_replace( $p2, '[[\\1]]', $text );
2516 } else {
2517 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2518 }
2519
2520 /*
2521 $mw =& MagicWord::get( MAG_SUBST );
2522 $wgCurParser = $this->fork();
2523 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2524 $this->merge( $wgCurParser );
2525 */
2526
2527 # Trim trailing whitespace
2528 # MAG_END (__END__) tag allows for trailing
2529 # whitespace to be deliberately included
2530 $text = rtrim( $text );
2531 $mw =& MagicWord::get( MAG_END );
2532 $mw->matchAndRemove( $text );
2533
2534 return $text;
2535 }
2536
2537 # Set up some variables which are usually set up in parse()
2538 # so that an external function can call some class members with confidence
2539 function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2540 $this->mTitle =& $title;
2541 $this->mOptions = $options;
2542 $this->mOutputType = $outputType;
2543 if ( $clearState ) {
2544 $this->clearState();
2545 }
2546 }
2547
2548 function transformMsg( $text, $options ) {
2549 global $wgTitle;
2550 static $executing = false;
2551
2552 # Guard against infinite recursion
2553 if ( $executing ) {
2554 return $text;
2555 }
2556 $executing = true;
2557
2558 $this->mTitle = $wgTitle;
2559 $this->mOptions = $options;
2560 $this->mOutputType = OT_MSG;
2561 $this->clearState();
2562 $text = $this->replaceVariables( $text );
2563
2564 $executing = false;
2565 return $text;
2566 }
2567
2568 # Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2569 # Callback will be called with the text within
2570 # Transform and return the text within
2571 function setHook( $tag, $callback ) {
2572 $oldVal = @$this->mTagHooks[$tag];
2573 $this->mTagHooks[$tag] = $callback;
2574 return $oldVal;
2575 }
2576 }
2577
2578 class ParserOutput
2579 {
2580 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2581 var $mCacheTime; # Used in ParserCache
2582
2583 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2584 $containsOldMagic = false )
2585 {
2586 $this->mText = $text;
2587 $this->mLanguageLinks = $languageLinks;
2588 $this->mCategoryLinks = $categoryLinks;
2589 $this->mContainsOldMagic = $containsOldMagic;
2590 $this->mCacheTime = "";
2591 }
2592
2593 function getText() { return $this->mText; }
2594 function getLanguageLinks() { return $this->mLanguageLinks; }
2595 function getCategoryLinks() { return $this->mCategoryLinks; }
2596 function getCacheTime() { return $this->mCacheTime; }
2597 function containsOldMagic() { return $this->mContainsOldMagic; }
2598 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2599 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2600 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2601 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2602 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2603
2604 function merge( $other ) {
2605 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2606 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2607 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2608 }
2609
2610 }
2611
2612 class ParserOptions
2613 {
2614 # All variables are private
2615 var $mUseTeX; # Use texvc to expand <math> tags
2616 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2617 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2618 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2619 var $mAllowExternalImages; # Allow external images inline
2620 var $mSkin; # Reference to the preferred skin
2621 var $mDateFormat; # Date format index
2622 var $mEditSection; # Create "edit section" links
2623 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2624 var $mNumberHeadings; # Automatically number headings
2625 var $mShowToc; # Show table of contents
2626
2627 function getUseTeX() { return $this->mUseTeX; }
2628 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2629 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2630 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2631 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2632 function getSkin() { return $this->mSkin; }
2633 function getDateFormat() { return $this->mDateFormat; }
2634 function getEditSection() { return $this->mEditSection; }
2635 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2636 function getNumberHeadings() { return $this->mNumberHeadings; }
2637 function getShowToc() { return $this->mShowToc; }
2638
2639 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2640 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2641 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2642 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2643 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2644 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2645 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2646 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2647 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2648 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2649
2650 function setSkin( &$x ) { $this->mSkin =& $x; }
2651
2652 # Get parser options
2653 /* static */ function newFromUser( &$user ) {
2654 $popts = new ParserOptions;
2655 $popts->initialiseFromUser( $user );
2656 return $popts;
2657 }
2658
2659 # Get user options
2660 function initialiseFromUser( &$userInput ) {
2661 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2662
2663 if ( !$userInput ) {
2664 $user = new User;
2665 $user->setLoaded( true );
2666 } else {
2667 $user =& $userInput;
2668 }
2669
2670 $this->mUseTeX = $wgUseTeX;
2671 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2672 $this->mUseDynamicDates = $wgUseDynamicDates;
2673 $this->mInterwikiMagic = $wgInterwikiMagic;
2674 $this->mAllowExternalImages = $wgAllowExternalImages;
2675 $this->mSkin =& $user->getSkin();
2676 $this->mDateFormat = $user->getOption( 'date' );
2677 $this->mEditSection = $user->getOption( 'editsection' );
2678 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2679 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2680 $this->mShowToc = $user->getOption( 'showtoc' );
2681 }
2682
2683
2684 }
2685
2686 # Regex callbacks, used in Parser::replaceVariables
2687 function wfBraceSubstitution( $matches ) {
2688 global $wgCurParser;
2689 return $wgCurParser->braceSubstitution( $matches );
2690 }
2691
2692 function wfArgSubstitution( $matches ) {
2693 global $wgCurParser;
2694 return $wgCurParser->argSubstitution( $matches );
2695 }
2696
2697 function wfVariableSubstitution( $matches ) {
2698 global $wgCurParser;
2699 return $wgCurParser->variableSubstitution( $matches );
2700 }
2701
2702 # Return the total number of articles
2703 function wfNumberOfArticles() {
2704 global $wgNumberOfArticles;
2705
2706 wfLoadSiteStats();
2707 return $wgNumberOfArticles;
2708 }
2709
2710 # Get various statistics from the database
2711 /* private */ function wfLoadSiteStats() {
2712 global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2713 $fname = 'wfLoadSiteStats';
2714
2715 if ( -1 != $wgNumberOfArticles ) return;
2716 $dbr =& wfGetDB( DB_SLAVE );
2717 $s = $dbr->getArray( 'site_stats',
2718 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2719 array( 'ss_row_id' => 1 ), $fname
2720 );
2721
2722 if ( $s === false ) {
2723 return;
2724 } else {
2725 $wgTotalViews = $s->ss_total_views;
2726 $wgTotalEdits = $s->ss_total_edits;
2727 $wgNumberOfArticles = $s->ss_good_articles;
2728 }
2729 }
2730
2731 function wfEscapeHTMLTagsOnly( $in ) {
2732 return str_replace(
2733 array( '"', '>', '<' ),
2734 array( '&quot;', '&gt;', '&lt;' ),
2735 $in );
2736 }
2737
2738
2739 ?>