BUG#1402 Make link color of tab subject page link on talk page indicate whether artic...
[lhc/web/wiklou.git] / includes / ParserXML.php
1 <?php
2 /**
3 *
4 * @package MediaWiki
5 * @subpackage Experimental
6 */
7
8 /** */
9 require_once ( "Parser.php" ) ;
10
11 /**
12 * This should one day become the XML->(X)HTML parser
13 * Based on work by Jan Hidders and Magnus Manske
14 * To use, set
15 * $wgUseXMLparser = true ;
16 * $wgEnableParserCache = false ;
17 * $wgWiki2xml to the path and executable of the command line version (cli)
18 * in LocalSettings.php
19 * @package MediaWiki
20 * @subpackage Experimental
21 */
22
23 /**
24 * the base class for an element
25 * @package MediaWiki
26 * @subpackage Experimental
27 */
28 class element {
29 var $name = '';
30 var $attrs = array();
31 var $children = array();
32
33 /**
34 * This finds the ATTRS element and returns the ATTR sub-children as a single string
35 */
36 function getSourceAttrs ()
37 {
38 $ret = '' ;
39 foreach ($this->children as $child) {
40 if ( !is_string($child) AND $child->name == 'ATTRS' ) {
41 $ret = $child->makeXHTML ( $parser );
42 }
43 }
44 return $ret ;
45 }
46
47 /**
48 * This collects the ATTR thingies for getSourceAttrs()
49 */
50 function getTheseAttrs () {
51 $ret = array() ;
52 foreach ($this->children as $child) {
53 if ( !is_string($child) AND $child->name == 'ATTR' ) {
54 $ret[] = $child->attrs["NAME"] . "='" . $child->children[0] . "'" ;
55 }
56 }
57 return implode ( ' ' , $ret ) ;
58 }
59
60 function fixLinkTails ( &$parser , $key ) {
61 $k2 = $key + 1 ;
62 if ( !isset ( $this->children[$k2] ) ) return ;
63 if ( !is_string ( $this->children[$k2]) ) return ;
64 if ( is_string ( $this->children[$key]) ) return ;
65 if ( $this->children[$key]->name != "LINK" ) return ;
66
67 $n = $this->children[$k2] ;
68 $s = '';
69 while ( $n != '' AND
70 ( ( $n[0] >= 'a' AND $n[0] <= 'z' ) OR
71 $n[0] == 'ä' OR $n[0] == 'ö' OR
72 $n[0] == 'ü' OR $n[0] == 'ß' ) )
73 {
74 $s .= $n[0] ;
75 $n = substr ( $n , 1 ) ;
76 }
77 $this->children[$k2] = $n ;
78
79 if ( count ( $this->children[$key]->children ) > 1 ) {
80 $kl = array_keys ( $this->children[$key]->children ) ;
81 $kl = array_pop ( $kl ) ;
82 $this->children[$key]->children[$kl]->children[] = $s ;
83 } else {
84 $e = new element ;
85 $e->name = "LINKOPTION" ;
86 $t = $this->children[$key]->sub_makeXHTML ( $parser ) ;
87 $e->children[] = trim ( $t ) . $s ;
88 $this->children[$key]->children[] = $e ;
89 }
90 }
91
92 /**
93 * This function generates the XHTML for the entire subtree
94 */
95 function sub_makeXHTML ( &$parser , $tag = '' , $attr = '' ){
96 $ret = '' ;
97
98 $attr2 = $this->getSourceAttrs () ;
99 if ( $attr != '' AND $attr2 != '' ) $attr .= ' ' ;
100 $attr .= $attr2 ;
101
102 if ( $tag != "" )
103 {
104 $ret .= "<" . $tag ;
105 if ( $attr != "" ) $ret .= " " . $attr ;
106 $ret .= ">" ;
107 }
108
109 # THIS SHOULD BE DONE IN THE WIKI2XML-PARSER INSTEAD
110 # foreach ( array_keys ( $this->children ) AS $x )
111 # $this->fixLinkTails ( $parser , $x ) ;
112
113 foreach ($this->children as $key => $child) {
114 if ( is_string($child) ) {
115 $ret .= $child ;
116 } else if ( $child->name != "ATTRS" ) {
117 $ret .= $child->makeXHTML ( $parser );
118 }
119 }
120 if ( $tag != "" )
121 $ret .= "</" . $tag . ">\n" ;
122 return $ret ;
123 }
124
125 /**
126 * Link functions
127 */
128 function createInternalLink ( &$parser , $target , $display_title , $options )
129 {
130 global $wgUser ;
131 $skin = $wgUser->getSkin() ;
132 $tp = explode ( ":" , $target ) ; # tp = target parts
133 $title = "" ; # The plain title
134 $language = "" ; # The language/meta/etc. part
135 $namespace = "" ; # The namespace, if any
136 $subtarget = "" ; # The '#' thingy
137
138
139 $nt = Title::newFromText ( $target ) ;
140 $fl = strtoupper ( $this->attrs["FORCEDLINK"] ) == "YES" ;
141
142 if ( $fl || count ( $tp ) == 1 ) $title = $target ; # Plain and simple case
143 else # There's stuff missing here...
144 {
145 if ( $nt->getNamespace() == NS_IMAGE )
146 {
147 $options[] = $display_title ;
148 return $skin->makeImageLinkObj ( $nt , implode ( "|" , $options ) ) ;
149 }
150 else $title = $target ; # Default
151 }
152
153 if ( $language != "" ) # External link within the WikiMedia project
154 {
155 return "{language link}" ;
156 }
157 else if ( $namespace != "" ) # Link to another namespace, check for image/media stuff
158 {
159 return "{namespace link}" ;
160 }
161 else
162 {
163 return $skin->makeLink ( $target , $display_title ) ;
164 }
165 }
166
167 function makeInternalLink ( &$parser )
168 {
169 $target = "" ;
170 $option = array () ;
171 foreach ($this->children as $child) {
172 if ( is_string($child) ) {
173 # This shouldn't be the case!
174 } else {
175 if ( $child->name == "LINKTARGET" )
176 $target = trim ( $child->makeXHTML ( $parser ) ) ;
177 else
178 $option[] = trim ( $child->makeXHTML ( $parser ) ) ;
179 }
180 }
181
182 if ( count ( $option ) == 0 ) $option[] = $target ; # Create dummy display title
183 $display_title = array_pop ( $option ) ;
184 return $this->createInternalLink ( $parser , $target , $display_title , $option ) ;
185 }
186
187 function getTemplateXHTML ( $title , $parts , &$parser ) {
188 global $wgLang , $wgUser ;
189 $skin = $wgUser->getSkin() ;
190 $ot = $title ; # Original title
191 if ( count ( explode ( ":" , $title ) ) == 1 )
192 $title = $wgLang->getNsText ( NS_TEMPLATE ) . ":" . $title ;
193 $nt = Title::newFromText ( $title ) ;
194 $id = $nt->getArticleID() ;
195 if ( $id == 0 ) { # No/non-existing page
196 return $skin->makeBrokenLink ( $title , $ot ) ;
197 }
198
199 $a = 0 ;
200 $tv = array () ; # Template variables
201 foreach ( $parts AS $part ) {
202 $a++ ;
203 $x = explode ( "=" , $part , 2 ) ;
204 if ( count ( $x ) == 1 ) $key = "{$a}" ;
205 else $key = $x[0] ;
206 $value = array_pop ( $x ) ;
207 $tv[$key] = $value ;
208 }
209 $art = new Article ( $nt ) ;
210 $text = $art->getContent ( false ) ;
211 $parser->plain_parse ( $text , true , $tv ) ;
212
213 return $text ;
214 }
215
216 /**
217 * This function actually converts wikiXML into XHTML tags
218 */
219 function makeXHTML ( &$parser )
220 {
221 $ret = "" ;
222 $n = $this->name ; # Shortcut
223
224 if ( $n == "EXTENSION" ) # Fix allowed HTML
225 {
226 $old_n = $n ;
227 $ext = strtoupper ( $this->attrs["NAME"] ) ;
228 if ( $ext == "B" || $ext == "STRONG" ) $n = "BOLD" ;
229 else if ( $ext == "I" || $ext == "EM" ) $n = "ITALICS" ;
230 else if ( $ext == "U" ) $n = "UNDERLINED" ; # Hey, virtual wiki tag! ;-)
231 else if ( $ext == "S" ) $n = "STRIKE" ;
232 else if ( $ext == "P" ) $n = "PARAGRAPH" ;
233 else if ( $ext == "TABLE" ) $n = "TABLE" ;
234 else if ( $ext == "TR" ) $n = "TABLEROW" ;
235 else if ( $ext == "TD" ) $n = "TABLECELL" ;
236 else if ( $ext == "TH" ) $n = "TABLEHEAD" ;
237 else if ( $ext == "CAPTION" ) $n = "CAPTION" ;
238 else if ( $ext == "NOWIKI" ) $n = "NOWIKI" ;
239 if ( $n != $old_n ) unset ( $this->attrs["NAME"] ) ; # Cleanup
240 else if ( $parser->nowiki > 0 ) $n = "" ; # No "real" wiki tags allowed in nowiki section
241 }
242
243 if ( $n == "ARTICLE" )
244 $ret .= $this->sub_makeXHTML ( $parser ) ;
245 else if ( $n == "HEADING" )
246 $ret .= $this->sub_makeXHTML ( $parser , "h" . $this->attrs["LEVEL"] ) ;
247 else if ( $n == "PARAGRAPH" )
248 $ret .= $this->sub_makeXHTML ( $parser , "p" ) ;
249 else if ( $n == "BOLD" )
250 $ret .= $this->sub_makeXHTML ( $parser , "strong" ) ;
251 else if ( $n == "ITALICS" )
252 $ret .= $this->sub_makeXHTML ( $parser , "em" ) ;
253
254 # These don't exist as wiki markup
255 else if ( $n == "UNDERLINED" )
256 $ret .= $this->sub_makeXHTML ( $parser , "u" ) ;
257 else if ( $n == "STRIKE" )
258 $ret .= $this->sub_makeXHTML ( $parser , "strike" ) ;
259
260 # HTML comment
261 else if ( $n == "COMMENT" )
262 $ret .= "" ; # Comments are parsed out
263
264 # Links
265 else if ( $n == "LINK" )
266 $ret .= $this->makeInternalLink ( $parser ) ;
267 else if ( $n == "LINKTARGET" )
268 $ret .= $this->sub_makeXHTML ( $parser ) ;
269 else if ( $n == "LINKOPTION" )
270 $ret .= $this->sub_makeXHTML ( $parser ) ;
271
272 else if ( $n == "TEMPLATE" )
273 {
274 $parts = $this->sub_makeXHTML ( $parser ) ;
275 $parts = explode ( "|" , $parts ) ;
276 $title = array_shift ( $parts ) ;
277 $ret .= $this->getTemplateXHTML ( $title , $parts , &$parser ) ;
278 }
279 else if ( $n == "TEMPLATEVAR" )
280 {
281 $x = $this->sub_makeXHTML ( $parser ) ;
282 if ( isset ( $parser->mCurrentTemplateOptions["{$x}"] ) )
283 $ret .= $parser->mCurrentTemplateOptions["{$x}"] ;
284 }
285
286 else if ( $n == "IGNORE" ) # Internal use, not generated by wiki2xml parser
287 $ret .= $this->sub_makeXHTML ( $parser ) ;
288
289 else if ( $n == "NOWIKI" )
290 {
291 $parser->nowiki++ ;
292 $ret .= $this->sub_makeXHTML ( $parser , "" ) ;
293 $parser->nowiki-- ;
294 }
295
296 # Unknown HTML extension
297 else if ( $n == "EXTENSION" ) # This is currently a dummy!!!
298 {
299 $ext = $this->attrs["NAME"] ;
300
301 $ret .= "&lt;" . $ext . "&gt;" ;
302 $ret .= $this->sub_makeXHTML ( $parser ) ;
303 $ret .= "&lt;/" . $ext . "&gt; " ;
304 }
305
306 # Table stuff
307 else if ( $n == "TABLE" )
308 {
309 $ret .= $this->sub_makeXHTML ( $parser , "table" ) ;
310 }
311 else if ( $n == "TABLEROW" )
312 {
313 $ret .= $this->sub_makeXHTML ( $parser , "tr" ) ;
314 }
315 else if ( $n == "TABLECELL" )
316 {
317 $ret .= $this->sub_makeXHTML ( $parser , "td" ) ;
318 }
319 else if ( $n == "TABLEHEAD" )
320 {
321 $ret .= $this->sub_makeXHTML ( $parser , "th" ) ;
322 }
323 else if ( $n == "CAPTION" )
324 {
325 $ret .= $this->sub_makeXHTML ( $parser , "caption" ) ;
326 }
327
328 else if ( $n == "ATTRS" ) # SPECIAL CASE : returning attributes
329 {
330 return $this->getTheseAttrs () ;
331 }
332
333 # Lists
334 else if ( $n == "LISTITEM" )
335 {
336 if ( $parser->mListType == "dl" ) $ret .= $this->sub_makeXHTML ( $parser , "dd" ) ;
337 else $ret .= $this->sub_makeXHTML ( $parser , "li" ) ;
338 }
339 else if ( $n == "LIST" )
340 {
341 $type = "ol" ; # Default
342 if ( $this->attrs["TYPE"] == "bullet" ) $type = "ul" ;
343 else if ( $this->attrs["TYPE"] == "indent" ) $type = "dl" ;
344 $oldtype = $parser->mListType ;
345 $parser->mListType = $type ;
346 $ret .= $this->sub_makeXHTML ( $parser , $type ) ;
347 $parser->mListType = $oldtype ;
348 }
349
350 # Something else entirely
351 else
352 {
353 $ret .= "&lt;" . $n . "&gt;" ;
354 $ret .= $this->sub_makeXHTML ( $parser ) ;
355 $ret .= "&lt;/" . $n . "&gt; " ;
356 }
357
358 $ret = "\n{$ret}\n" ;
359 $ret = str_replace ( "\n\n" , "\n" , $ret ) ;
360 return $ret ;
361 }
362
363 /**
364 * A function for additional debugging output
365 */
366 function myPrint() {
367 $ret = "<ul>\n";
368 $ret .= "<li> <b> Name: </b> $this->name </li>\n";
369 // print attributes
370 $ret .= '<li> <b> Attributes: </b>';
371 foreach ($this->attrs as $name => $value) {
372 $ret .= "$name => $value; " ;
373 }
374 $ret .= " </li>\n";
375 // print children
376 foreach ($this->children as $child) {
377 if ( is_string($child) ) {
378 $ret .= "<li> $child </li>\n";
379 } else {
380 $ret .= $child->myPrint();
381 }
382 }
383 $ret .= "</ul>\n";
384 return $ret;
385 }
386 }
387
388 $ancStack = array(); // the stack with ancestral elements
389
390 // START Three global functions needed for parsing, sorry guys
391 /** @todo document */
392 function wgXMLstartElement($parser, $name, $attrs) {
393 global $ancStack;
394
395 $newElem = new element;
396 $newElem->name = $name;
397 $newElem->attrs = $attrs;
398
399 array_push($ancStack, $newElem);
400 }
401
402 /** @todo document */
403 function wgXMLendElement($parser, $name) {
404 global $ancStack, $rootElem;
405 // pop element off stack
406 $elem = array_pop ($ancStack);
407 if (count ($ancStack) == 0)
408 $rootElem = $elem;
409 else
410 // add it to its parent
411 array_push ($ancStack[count($ancStack)-1]->children, $elem);
412 }
413
414 /** @todo document */
415 function wgXMLcharacterData($parser, $data) {
416 global $ancStack;
417 $data = trim ($data); // Don't add blank lines, they're no use...
418 // add to parent if parent exists
419 if ( $ancStack && $data != "" ) {
420 array_push ($ancStack[count($ancStack)-1]->children, $data);
421 }
422 }
423 // END Three global functions needed for parsing, sorry guys
424
425 /**
426 * Here's the class that generates a nice tree
427 * @package MediaWiki
428 * @subpackage Experimental
429 */
430 class xml2php {
431
432 /** @todo document */
433 function &scanFile( $filename ) {
434 global $ancStack, $rootElem;
435 $ancStack = array();
436
437 $xml_parser = xml_parser_create();
438 xml_set_element_handler ($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
439 xml_set_character_data_handler ($xml_parser, 'wgXMLcharacterData');
440 if (!($fp = fopen($filename, 'r'))) {
441 die('could not open XML input');
442 }
443 while ($data = fread($fp, 4096)) {
444 if (!xml_parse($xml_parser, $data, feof($fp))) {
445 die(sprintf("XML error: %s at line %d",
446 xml_error_string(xml_get_error_code($xml_parser)),
447 xml_get_current_line_number($xml_parser)));
448 }
449 }
450 xml_parser_free($xml_parser);
451
452 // return the remaining root element we copied in the beginning
453 return $rootElem;
454 }
455
456 /** @todo document */
457 function scanString ( $input ) {
458 global $ancStack, $rootElem;
459 $ancStack = array();
460
461 $xml_parser = xml_parser_create();
462 xml_set_element_handler ($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
463 xml_set_character_data_handler ($xml_parser, 'wgXMLcharacterData');
464
465 if (!xml_parse ($xml_parser, $input, true)) {
466 die (sprintf ("XML error: %s at line %d",
467 xml_error_string(xml_get_error_code($xml_parser)),
468 xml_get_current_line_number($xml_parser)));
469 }
470 xml_parser_free ($xml_parser);
471
472 // return the remaining root element we copied in the beginning
473 return $rootElem;
474 }
475
476 }
477
478 /**
479 * @todo document
480 * @package MediaWiki
481 * @subpackage Experimental
482 */
483 class ParserXML extends Parser
484 {
485 /**#@+
486 * @access private
487 */
488 # Persistent:
489 var $mTagHooks, $mListType;
490
491 # Cleared with clearState():
492 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
493 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
494
495 # Temporary:
496 var $mOptions, $mTitle, $mOutputType,
497 $mTemplates, // cache of already loaded templates, avoids
498 // multiple SQL queries for the same string
499 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
500 // in this path. Used for loop detection.
501
502 var $nowikicount , $mCurrentTemplateOptions ;
503
504 /**#@-*/
505
506 /**
507 * Constructor
508 *
509 * @access public
510 */
511 function ParserXML() {
512 $this->mTemplates = array();
513 $this->mTemplatePath = array();
514 $this->mTagHooks = array();
515 $this->clearState();
516 }
517
518 /**
519 * Clear Parser state
520 *
521 * @access private
522 */
523 function clearState() {
524 $this->mOutput = new ParserOutput;
525 $this->mAutonumber = 0;
526 $this->mLastSection = "";
527 $this->mDTopen = false;
528 $this->mVariables = false;
529 $this->mIncludeCount = array();
530 $this->mStripState = array();
531 $this->mArgStack = array();
532 $this->mInPre = false;
533 }
534
535 /**
536 * Turns the wikitext into XML by calling the external parser
537 *
538 */
539 function html2xml ( &$text ) {
540 global $wgWiki2xml ;
541
542 # generating html2xml command path
543 $a = $wgWiki2xml ;
544 $a = explode ( "/" , $a ) ;
545 array_pop ( $a ) ;
546 $a[] = "html2xml" ;
547 $html2xml = implode ( "/" , $a ) ;
548 $a = array () ;
549
550 $tmpfname = tempnam("/tmp", "FOO");
551 $handle = fopen($tmpfname, "w");
552 fwrite($handle, utf8_encode ( $text ) );
553 fclose($handle);
554 exec ( $html2xml . " < " . $tmpfname , $a ) ;
555 $text = utf8_decode ( implode ( "\n" , $a ) ) ;
556 unlink($tmpfname);
557 }
558
559 /** @todo document */
560 function runXMLparser ( &$text ) {
561 global $wgWiki2xml ;
562
563 $this->html2xml ( $text ) ;
564
565 $tmpfname = tempnam("/tmp", "FOO");
566 $handle = fopen($tmpfname, "w");
567 fwrite($handle, $text ) ;
568 fclose($handle);
569 exec ( $wgWiki2xml . " < " . $tmpfname , $a ) ;
570 $text = utf8_decode ( implode ( "\n" , $a ) ) ;
571 unlink($tmpfname);
572 }
573
574 /** @todo document */
575 function plain_parse ( &$text , $inline = false , $templateOptions = array () ) {
576 $this->runXMLparser ( $text ) ;
577 $nowikicount = 0 ;
578 $w = new xml2php;
579 $result = $w->scanString( $text );
580
581 $oldTemplateOptions = $this->mCurrentTemplateOptions ;
582 $this->mCurrentTemplateOptions = $templateOptions ;
583
584 if ( $inline ) { # Inline rendering off for templates
585 if ( count ( $result->children ) == 1 )
586 $result->children[0]->name = "IGNORE" ;
587 }
588
589 if ( 1 ) $text = $result->makeXHTML ( $this ) ; # No debugging info
590 else $text = $result->makeXHTML ( $this ) . "<hr>" . $text . "<hr>" . $result->myPrint();
591 $this->mCurrentTemplateOptions = $oldTemplateOptions ;
592 }
593
594 /** @todo document */
595 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
596 $this->plain_parse ( $text ) ;
597 $this->mOutput->setText ( $text ) ;
598 return $this->mOutput;
599 }
600
601 }
602 ?>