Use AutoLoader to load classes:
[lhc/web/wiklou.git] / includes / ParserXML.php
1 <?php
2 /**
3 *
4 * @package MediaWiki
5 * @subpackage Experimental
6 */
7
8 /**
9 * This should one day become the XML->(X)HTML parser
10 * Based on work by Jan Hidders and Magnus Manske
11 * To use, set
12 * $wgUseXMLparser = true ;
13 * $wgEnableParserCache = false ;
14 * $wgWiki2xml to the path and executable of the command line version (cli)
15 * in LocalSettings.php
16 * @package MediaWiki
17 * @subpackage Experimental
18 */
19
20 /**
21 * the base class for an element
22 * @package MediaWiki
23 * @subpackage Experimental
24 */
25 class element {
26 var $name = '';
27 var $attrs = array ();
28 var $children = array ();
29
30 /**
31 * This finds the ATTRS element and returns the ATTR sub-children as a single string
32 * @todo FIXME $parser always empty when calling makeXHTML()
33 */
34 function getSourceAttrs() {
35 $ret = '';
36 foreach ($this->children as $child) {
37 if (!is_string($child) AND $child->name == 'ATTRS') {
38 $ret = $child->makeXHTML($parser);
39 }
40 }
41 return $ret;
42 }
43
44 /**
45 * This collects the ATTR thingies for getSourceAttrs()
46 */
47 function getTheseAttrs() {
48 $ret = array ();
49 foreach ($this->children as $child) {
50 if (!is_string($child) AND $child->name == 'ATTR') {
51 $ret[] = $child->attrs["NAME"]."='".$child->children[0]."'";
52 }
53 }
54 return implode(' ', $ret);
55 }
56
57 function fixLinkTails(& $parser, $key) {
58 $k2 = $key +1;
59 if (!isset ($this->children[$k2]))
60 return;
61 if (!is_string($this->children[$k2]))
62 return;
63 if (is_string($this->children[$key]))
64 return;
65 if ($this->children[$key]->name != "LINK")
66 return;
67
68 $n = $this->children[$k2];
69 $s = '';
70 while ($n != '' AND (($n[0] >= 'a' AND $n[0] <= 'z') OR $n[0] == 'ä' OR $n[0] == 'ö' OR $n[0] == 'ü' OR $n[0] == 'ß')) {
71 $s .= $n[0];
72 $n = substr($n, 1);
73 }
74 $this->children[$k2] = $n;
75
76 if (count($this->children[$key]->children) > 1) {
77 $kl = array_keys($this->children[$key]->children);
78 $kl = array_pop($kl);
79 $this->children[$key]->children[$kl]->children[] = $s;
80 } else {
81 $e = new element;
82 $e->name = "LINKOPTION";
83 $t = $this->children[$key]->sub_makeXHTML($parser);
84 $e->children[] = trim($t).$s;
85 $this->children[$key]->children[] = $e;
86 }
87 }
88
89 /**
90 * This function generates the XHTML for the entire subtree
91 */
92 function sub_makeXHTML(& $parser, $tag = '', $attr = '') {
93 $ret = '';
94
95 $attr2 = $this->getSourceAttrs();
96 if ($attr != '' AND $attr2 != '')
97 $attr .= ' ';
98 $attr .= $attr2;
99
100 if ($tag != '') {
101 $ret .= '<'.$tag;
102 if ($attr != '')
103 $ret .= ' '.$attr;
104 $ret .= '>';
105 }
106
107 # THIS SHOULD BE DONE IN THE WIKI2XML-PARSER INSTEAD
108 # foreach ( array_keys ( $this->children ) AS $x )
109 # $this->fixLinkTails ( $parser , $x ) ;
110
111 foreach ($this->children as $child) {
112 if (is_string($child)) {
113 $ret .= $child;
114 } elseif ($child->name != 'ATTRS') {
115 $ret .= $child->makeXHTML($parser);
116 }
117 }
118 if ($tag != '')
119 $ret .= '</'.$tag.">\n";
120 return $ret;
121 }
122
123 /**
124 * Link functions
125 */
126 function createInternalLink(& $parser, $target, $display_title, $options) {
127 global $wgUser;
128 $skin = $wgUser->getSkin();
129 $tp = explode(':', $target); # tp = target parts
130 $title = ''; # The plain title
131 $language = ''; # The language/meta/etc. part
132 $namespace = ''; # The namespace, if any
133 $subtarget = ''; # The '#' thingy
134
135 $nt = Title :: newFromText($target);
136 $fl = strtoupper($this->attrs['FORCEDLINK']) == 'YES';
137
138 if ($fl || count($tp) == 1) {
139 # Plain and simple case
140 $title = $target;
141 } else {
142 # There's stuff missing here...
143 if ($nt->getNamespace() == NS_IMAGE) {
144 $options[] = $display_title;
145 return $parser->makeImage($nt, implode('|', $options));
146 } else {
147 # Default
148 $title = $target;
149 }
150 }
151
152 if ($language != '') {
153 # External link within the WikiMedia project
154 return "{language link}";
155 } else {
156 if ($namespace != '') {
157 # Link to another namespace, check for image/media stuff
158 return "{namespace link}";
159 } else {
160 return $skin->makeLink($target, $display_title);
161 }
162 }
163 }
164
165 /** @todo document */
166 function makeInternalLink(& $parser) {
167 $target = '';
168 $option = array ();
169 foreach ($this->children as $child) {
170 if (is_string($child)) {
171 # This shouldn't be the case!
172 } else {
173 if ($child->name == 'LINKTARGET') {
174 $target = trim($child->makeXHTML($parser));
175 } else {
176 $option[] = trim($child->makeXHTML($parser));
177 }
178 }
179 }
180
181 if (count($option) == 0)
182 $option[] = $target; # Create dummy display title
183 $display_title = array_pop($option);
184 return $this->createInternalLink($parser, $target, $display_title, $option);
185 }
186
187 /** @todo document */
188 function getTemplateXHTML($title, $parts, & $parser) {
189 global $wgLang, $wgUser;
190 $skin = $wgUser->getSkin();
191 $ot = $title; # Original title
192 if (count(explode(':', $title)) == 1)
193 $title = $wgLang->getNsText(NS_TEMPLATE).":".$title;
194 $nt = Title :: newFromText($title);
195 $id = $nt->getArticleID();
196 if ($id == 0) {
197 # No/non-existing page
198 return $skin->makeBrokenLink($title, $ot);
199 }
200
201 $a = 0;
202 $tv = array (); # Template variables
203 foreach ($parts AS $part) {
204 $a ++;
205 $x = explode('=', $part, 2);
206 if (count($x) == 1)
207 $key = "{$a}";
208 else
209 $key = $x[0];
210 $value = array_pop($x);
211 $tv[$key] = $value;
212 }
213 $art = new Article($nt);
214 $text = $art->getContent(false);
215 $parser->plain_parse($text, true, $tv);
216
217 return $text;
218 }
219
220 /**
221 * This function actually converts wikiXML into XHTML tags
222 * @todo use switch() !
223 */
224 function makeXHTML(& $parser) {
225 $ret = '';
226 $n = $this->name; # Shortcut
227
228 if ($n == 'EXTENSION') {
229 # Fix allowed HTML
230 $old_n = $n;
231 $ext = strtoupper($this->attrs['NAME']);
232
233 switch($ext) {
234 case 'B':
235 case 'STRONG':
236 $n = 'BOLD';
237 break;
238 case 'I':
239 case 'EM':
240 $n = 'ITALICS';
241 break;
242 case 'U':
243 $n = 'UNDERLINED'; # Hey, virtual wiki tag! ;-)
244 break;
245 case 'S':
246 $n = 'STRIKE';
247 break;
248 case 'P':
249 $n = 'PARAGRAPH';
250 break;
251 case 'TABLE':
252 $n = 'TABLE';
253 break;
254 case 'TR':
255 $n = 'TABLEROW';
256 break;
257 case 'TD':
258 $n = 'TABLECELL';
259 break;
260 case 'TH':
261 $n = 'TABLEHEAD';
262 break;
263 case 'CAPTION':
264 $n = 'CAPTION';
265 break;
266 case 'NOWIKI':
267 $n = 'NOWIKI';
268 break;
269 }
270 if ($n != $old_n) {
271 unset ($this->attrs['NAME']); # Cleanup
272 } elseif ($parser->nowiki > 0) {
273 # No 'real' wiki tags allowed in nowiki section
274 $n = '';
275 }
276 } // $n = 'EXTENSION'
277
278 switch($n) {
279 case 'ARTICLE':
280 $ret .= $this->sub_makeXHTML($parser);
281 break;
282 case 'HEADING':
283 $ret .= $this->sub_makeXHTML($parser, 'h'.$this->attrs['LEVEL']);
284 break;
285 case 'PARAGRAPH':
286 $ret .= $this->sub_makeXHTML($parser, 'p');
287 break;
288 case 'BOLD':
289 $ret .= $this->sub_makeXHTML($parser, 'strong');
290 break;
291 case 'ITALICS':
292 $ret .= $this->sub_makeXHTML($parser, 'em');
293 break;
294
295 # These don't exist as wiki markup
296 case 'UNDERLINED':
297 $ret .= $this->sub_makeXHTML($parser, 'u');
298 break;
299 case 'STRIKE':
300 $ret .= $this->sub_makeXHTML($parser, 'strike');
301 break;
302
303 # HTML comment
304 case 'COMMENT':
305 # Comments are parsed out
306 $ret .= '';
307 break;
308
309
310 # Links
311 case 'LINK':
312 $ret .= $this->makeInternalLink($parser);
313 break;
314 case 'LINKTARGET':
315 case 'LINKOPTION':
316 $ret .= $this->sub_makeXHTML($parser);
317 break;
318
319 case 'TEMPLATE':
320 $parts = $this->sub_makeXHTML($parser);
321 $parts = explode('|', $parts);
322 $title = array_shift($parts);
323 $ret .= $this->getTemplateXHTML($title, $parts, & $parser);
324 break;
325
326 case 'TEMPLATEVAR':
327 $x = $this->sub_makeXHTML($parser);
328 if (isset ($parser->mCurrentTemplateOptions["{$x}"]))
329 $ret .= $parser->mCurrentTemplateOptions["{$x}"];
330 break;
331
332 # Internal use, not generated by wiki2xml parser
333 case 'IGNORE':
334 $ret .= $this->sub_makeXHTML($parser);
335
336 case 'NOWIKI':
337 $parser->nowiki++;
338 $ret .= $this->sub_makeXHTML($parser, '');
339 $parser->nowiki--;
340
341
342 # Unknown HTML extension
343 case 'EXTENSION': # This is currently a dummy!!!
344 $ext = $this->attrs['NAME'];
345
346 $ret .= '&lt;'.$ext.'&gt;';
347 $ret .= $this->sub_makeXHTML($parser);
348 $ret .= '&lt;/'.$ext.'&gt; ';
349 break;
350
351
352 # Table stuff
353
354 case 'TABLE':
355 $ret .= $this->sub_makeXHTML($parser, 'table');
356 break;
357 case 'TABLEROW':
358 $ret .= $this->sub_makeXHTML($parser, 'tr');
359 break;
360 case 'TABLECELL':
361 $ret .= $this->sub_makeXHTML($parser, 'td');
362 break;
363 case 'TABLEHEAD':
364 $ret .= $this->sub_makeXHTML($parser, 'th');
365 break;
366 case 'CAPTION':
367 $ret .= $this->sub_makeXHTML($parser, 'caption');
368 break;
369 case 'ATTRS': # SPECIAL CASE : returning attributes
370 return $this->getTheseAttrs();
371
372
373 # Lists stuff
374 case 'LISTITEM':
375 if ($parser->mListType == 'dl')
376 $ret .= $this->sub_makeXHTML($parser, 'dd');
377 else
378 $ret .= $this->sub_makeXHTML($parser, 'li');
379 break;
380 case 'LIST':
381 $type = 'ol'; # Default
382 if ($this->attrs['TYPE'] == 'bullet')
383 $type = 'ul';
384 else
385 if ($this->attrs['TYPE'] == 'indent')
386 $type = 'dl';
387 $oldtype = $parser->mListType;
388 $parser->mListType = $type;
389 $ret .= $this->sub_makeXHTML($parser, $type);
390 $parser->mListType = $oldtype;
391 break;
392
393 # Something else entirely
394 default:
395 $ret .= '&lt;'.$n.'&gt;';
396 $ret .= $this->sub_makeXHTML($parser);
397 $ret .= '&lt;/'.$n.'&gt; ';
398 } // switch($n)
399
400 $ret = "\n{$ret}\n";
401 $ret = str_replace("\n\n", "\n", $ret);
402 return $ret;
403 }
404
405 /**
406 * A function for additional debugging output
407 */
408 function myPrint() {
409 $ret = "<ul>\n";
410 $ret .= "<li> <b> Name: </b> $this->name </li>\n";
411 // print attributes
412 $ret .= '<li> <b> Attributes: </b>';
413 foreach ($this->attrs as $name => $value) {
414 $ret .= "$name => $value; ";
415 }
416 $ret .= " </li>\n";
417 // print children
418 foreach ($this->children as $child) {
419 if (is_string($child)) {
420 $ret .= "<li> $child </li>\n";
421 } else {
422 $ret .= $child->myPrint();
423 }
424 }
425 $ret .= "</ul>\n";
426 return $ret;
427 }
428 }
429
430 $ancStack = array (); // the stack with ancestral elements
431
432 // START Three global functions needed for parsing, sorry guys
433 /** @todo document */
434 function wgXMLstartElement($parser, $name, $attrs) {
435 global $ancStack;
436
437 $newElem = new element;
438 $newElem->name = $name;
439 $newElem->attrs = $attrs;
440
441 array_push($ancStack, $newElem);
442 }
443
444 /** @todo document */
445 function wgXMLendElement($parser, $name) {
446 global $ancStack, $rootElem;
447 // pop element off stack
448 $elem = array_pop($ancStack);
449 if (count($ancStack) == 0)
450 $rootElem = $elem;
451 else
452 // add it to its parent
453 array_push($ancStack[count($ancStack) - 1]->children, $elem);
454 }
455
456 /** @todo document */
457 function wgXMLcharacterData($parser, $data) {
458 global $ancStack;
459 $data = trim($data); // Don't add blank lines, they're no use...
460 // add to parent if parent exists
461 if ($ancStack && $data != "") {
462 array_push($ancStack[count($ancStack) - 1]->children, $data);
463 }
464 }
465 // END Three global functions needed for parsing, sorry guys
466
467 /**
468 * Here's the class that generates a nice tree
469 * @package MediaWiki
470 * @subpackage Experimental
471 */
472 class xml2php {
473
474 /** @todo document */
475 function & scanFile($filename) {
476 global $ancStack, $rootElem;
477 $ancStack = array ();
478
479 $xml_parser = xml_parser_create();
480 xml_set_element_handler($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
481 xml_set_character_data_handler($xml_parser, 'wgXMLcharacterData');
482 if (!($fp = fopen($filename, 'r'))) {
483 die('could not open XML input');
484 }
485 while ($data = fread($fp, 4096)) {
486 if (!xml_parse($xml_parser, $data, feof($fp))) {
487 die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
488 }
489 }
490 xml_parser_free($xml_parser);
491
492 // return the remaining root element we copied in the beginning
493 return $rootElem;
494 }
495
496 /** @todo document */
497 function scanString($input) {
498 global $ancStack, $rootElem;
499 $ancStack = array ();
500
501 $xml_parser = xml_parser_create();
502 xml_set_element_handler($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
503 xml_set_character_data_handler($xml_parser, 'wgXMLcharacterData');
504
505 if (!xml_parse($xml_parser, $input, true)) {
506 die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
507 }
508 xml_parser_free($xml_parser);
509
510 // return the remaining root element we copied in the beginning
511 return $rootElem;
512 }
513
514 }
515
516 /**
517 * @todo document
518 * @package MediaWiki
519 * @subpackage Experimental
520 */
521 class ParserXML extends Parser {
522 /**#@+
523 * @private
524 */
525 # Persistent:
526 var $mTagHooks, $mListType;
527
528 # Cleared with clearState():
529 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array ();
530 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
531
532 # Temporary:
533 var $mOptions, $mTitle, $mOutputType, $mTemplates, // cache of already loaded templates, avoids
534 // multiple SQL queries for the same string
535 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
536 // in this path. Used for loop detection.
537
538 var $nowikicount, $mCurrentTemplateOptions;
539
540 /**#@-*/
541
542 /**
543 * Constructor
544 *
545 * @public
546 */
547 function ParserXML() {
548 $this->mTemplates = array ();
549 $this->mTemplatePath = array ();
550 $this->mTagHooks = array ();
551 $this->clearState();
552 }
553
554 /**
555 * Clear Parser state
556 *
557 * @private
558 */
559 function clearState() {
560 $this->mOutput = new ParserOutput;
561 $this->mAutonumber = 0;
562 $this->mLastSection = "";
563 $this->mDTopen = false;
564 $this->mVariables = false;
565 $this->mIncludeCount = array ();
566 $this->mStripState = array ();
567 $this->mArgStack = array ();
568 $this->mInPre = false;
569 }
570
571 /**
572 * Turns the wikitext into XML by calling the external parser
573 *
574 */
575 function html2xml(& $text) {
576 global $wgWiki2xml;
577
578 # generating html2xml command path
579 $a = $wgWiki2xml;
580 $a = explode('/', $a);
581 array_pop($a);
582 $a[] = 'html2xml';
583 $html2xml = implode('/', $a);
584 $a = array ();
585
586 $tmpfname = tempnam( wfTempDir(), 'FOO' );
587 $handle = fopen($tmpfname, 'w');
588 fwrite($handle, utf8_encode($text));
589 fclose($handle);
590 exec($html2xml.' < '.$tmpfname, $a);
591 $text = utf8_decode(implode("\n", $a));
592 unlink($tmpfname);
593 }
594
595 /** @todo document */
596 function runXMLparser(& $text) {
597 global $wgWiki2xml;
598
599 $this->html2xml($text);
600
601 $tmpfname = tempnam( wfTempDir(), 'FOO');
602 $handle = fopen($tmpfname, 'w');
603 fwrite($handle, $text);
604 fclose($handle);
605 exec($wgWiki2xml.' < '.$tmpfname, $a);
606 $text = utf8_decode(implode("\n", $a));
607 unlink($tmpfname);
608 }
609
610 /** @todo document */
611 function plain_parse(& $text, $inline = false, $templateOptions = array ()) {
612 $this->runXMLparser($text);
613 $nowikicount = 0;
614 $w = new xml2php;
615 $result = $w->scanString($text);
616
617 $oldTemplateOptions = $this->mCurrentTemplateOptions;
618 $this->mCurrentTemplateOptions = $templateOptions;
619
620 if ($inline) { # Inline rendering off for templates
621 if (count($result->children) == 1)
622 $result->children[0]->name = 'IGNORE';
623 }
624
625 if (1)
626 $text = $result->makeXHTML($this); # No debugging info
627 else
628 $text = $result->makeXHTML($this).'<hr>'.$text.'<hr>'.$result->myPrint();
629 $this->mCurrentTemplateOptions = $oldTemplateOptions;
630 }
631
632 /** @todo document */
633 function parse($text, & $title, $options, $linestart = true, $clearState = true) {
634 $this->plain_parse($text);
635 $this->mOutput->setText($text);
636 return $this->mOutput;
637 }
638
639 }
640 ?>