Gah, forget this. borked
[lhc/web/wiklou.git] / includes / ParserXML.php
1 <?php
2 /**
3 *
4 * @package MediaWiki
5 * @subpackage Experimental
6 */
7
8 /** */
9 require_once ('Parser.php');
10
11 /**
12 * This should one day become the XML->(X)HTML parser
13 * Based on work by Jan Hidders and Magnus Manske
14 * To use, set
15 * $wgUseXMLparser = true ;
16 * $wgEnableParserCache = false ;
17 * $wgWiki2xml to the path and executable of the command line version (cli)
18 * in LocalSettings.php
19 * @package MediaWiki
20 * @subpackage Experimental
21 */
22
23 /**
24 * the base class for an element
25 * @package MediaWiki
26 * @subpackage Experimental
27 */
28 class element {
29 var $name = '';
30 var $attrs = array ();
31 var $children = array ();
32
33 /**
34 * This finds the ATTRS element and returns the ATTR sub-children as a single string
35 * @todo FIXME $parser always empty when calling makeXHTML()
36 */
37 function getSourceAttrs() {
38 $ret = '';
39 foreach ($this->children as $child) {
40 if (!is_string($child) AND $child->name == 'ATTRS') {
41 $ret = $child->makeXHTML($parser);
42 }
43 }
44 return $ret;
45 }
46
47 /**
48 * This collects the ATTR thingies for getSourceAttrs()
49 */
50 function getTheseAttrs() {
51 $ret = array ();
52 foreach ($this->children as $child) {
53 if (!is_string($child) AND $child->name == 'ATTR') {
54 $ret[] = $child->attrs["NAME"]."='".$child->children[0]."'";
55 }
56 }
57 return implode(' ', $ret);
58 }
59
60 function fixLinkTails(& $parser, $key) {
61 $k2 = $key +1;
62 if (!isset ($this->children[$k2]))
63 return;
64 if (!is_string($this->children[$k2]))
65 return;
66 if (is_string($this->children[$key]))
67 return;
68 if ($this->children[$key]->name != "LINK")
69 return;
70
71 $n = $this->children[$k2];
72 $s = '';
73 while ($n != '' AND (($n[0] >= 'a' AND $n[0] <= 'z') OR $n[0] == 'ä' OR $n[0] == 'ö' OR $n[0] == 'ü' OR $n[0] == 'ß')) {
74 $s .= $n[0];
75 $n = substr($n, 1);
76 }
77 $this->children[$k2] = $n;
78
79 if (count($this->children[$key]->children) > 1) {
80 $kl = array_keys($this->children[$key]->children);
81 $kl = array_pop($kl);
82 $this->children[$key]->children[$kl]->children[] = $s;
83 } else {
84 $e = new element;
85 $e->name = "LINKOPTION";
86 $t = $this->children[$key]->sub_makeXHTML($parser);
87 $e->children[] = trim($t).$s;
88 $this->children[$key]->children[] = $e;
89 }
90 }
91
92 /**
93 * This function generates the XHTML for the entire subtree
94 */
95 function sub_makeXHTML(& $parser, $tag = '', $attr = '') {
96 $ret = '';
97
98 $attr2 = $this->getSourceAttrs();
99 if ($attr != '' AND $attr2 != '')
100 $attr .= ' ';
101 $attr .= $attr2;
102
103 if ($tag != '') {
104 $ret .= '<'.$tag;
105 if ($attr != '')
106 $ret .= ' '.$attr;
107 $ret .= '>';
108 }
109
110 # THIS SHOULD BE DONE IN THE WIKI2XML-PARSER INSTEAD
111 # foreach ( array_keys ( $this->children ) AS $x )
112 # $this->fixLinkTails ( $parser , $x ) ;
113
114 foreach ($this->children as $child) {
115 if (is_string($child)) {
116 $ret .= $child;
117 } elseif ($child->name != 'ATTRS') {
118 $ret .= $child->makeXHTML($parser);
119 }
120 }
121 if ($tag != '')
122 $ret .= '</'.$tag.">\n";
123 return $ret;
124 }
125
126 /**
127 * Link functions
128 */
129 function createInternalLink(& $parser, $target, $display_title, $options) {
130 global $wgUser;
131 $skin = $wgUser->getSkin();
132 $tp = explode(':', $target); # tp = target parts
133 $title = ''; # The plain title
134 $language = ''; # The language/meta/etc. part
135 $namespace = ''; # The namespace, if any
136 $subtarget = ''; # The '#' thingy
137
138 $nt = Title :: newFromText($target);
139 $fl = strtoupper($this->attrs['FORCEDLINK']) == 'YES';
140
141 if ($fl || count($tp) == 1) {
142 # Plain and simple case
143 $title = $target;
144 } else {
145 # There's stuff missing here...
146 if ($nt->getNamespace() == NS_IMAGE) {
147 $options[] = $display_title;
148 return $parser->makeImage($nt, implode('|', $options));
149 } else {
150 # Default
151 $title = $target;
152 }
153 }
154
155 if ($language != '') {
156 # External link within the WikiMedia project
157 return "{language link}";
158 } else {
159 if ($namespace != '') {
160 # Link to another namespace, check for image/media stuff
161 return "{namespace link}";
162 } else {
163 return $skin->makeLink($target, $display_title);
164 }
165 }
166 }
167
168 /** @todo document */
169 function makeInternalLink(& $parser) {
170 $target = '';
171 $option = array ();
172 foreach ($this->children as $child) {
173 if (is_string($child)) {
174 # This shouldn't be the case!
175 } else {
176 if ($child->name == 'LINKTARGET') {
177 $target = trim($child->makeXHTML($parser));
178 } else {
179 $option[] = trim($child->makeXHTML($parser));
180 }
181 }
182 }
183
184 if (count($option) == 0)
185 $option[] = $target; # Create dummy display title
186 $display_title = array_pop($option);
187 return $this->createInternalLink($parser, $target, $display_title, $option);
188 }
189
190 /** @todo document */
191 function getTemplateXHTML($title, $parts, & $parser) {
192 global $wgLang, $wgUser;
193 $skin = $wgUser->getSkin();
194 $ot = $title; # Original title
195 if (count(explode(':', $title)) == 1)
196 $title = $wgLang->getNsText(NS_TEMPLATE).":".$title;
197 $nt = Title :: newFromText($title);
198 $id = $nt->getArticleID();
199 if ($id == 0) {
200 # No/non-existing page
201 return $skin->makeBrokenLink($title, $ot);
202 }
203
204 $a = 0;
205 $tv = array (); # Template variables
206 foreach ($parts AS $part) {
207 $a ++;
208 $x = explode('=', $part, 2);
209 if (count($x) == 1)
210 $key = "{$a}";
211 else
212 $key = $x[0];
213 $value = array_pop($x);
214 $tv[$key] = $value;
215 }
216 $art = new Article($nt);
217 $text = $art->getContent(false);
218 $parser->plain_parse($text, true, $tv);
219
220 return $text;
221 }
222
223 /**
224 * This function actually converts wikiXML into XHTML tags
225 * @todo use switch() !
226 */
227 function makeXHTML(& $parser) {
228 $ret = '';
229 $n = $this->name; # Shortcut
230
231 if ($n == 'EXTENSION') {
232 # Fix allowed HTML
233 $old_n = $n;
234 $ext = strtoupper($this->attrs['NAME']);
235
236 switch($ext) {
237 case 'B':
238 case 'STRONG':
239 $n = 'BOLD';
240 break;
241 case 'I':
242 case 'EM':
243 $n = 'ITALICS';
244 break;
245 case 'U':
246 $n = 'UNDERLINED'; # Hey, virtual wiki tag! ;-)
247 break;
248 case 'S':
249 $n = 'STRIKE';
250 break;
251 case 'P':
252 $n = 'PARAGRAPH';
253 break;
254 case 'TABLE':
255 $n = 'TABLE';
256 break;
257 case 'TR':
258 $n = 'TABLEROW';
259 break;
260 case 'TD':
261 $n = 'TABLECELL';
262 break;
263 case 'TH':
264 $n = 'TABLEHEAD';
265 break;
266 case 'CAPTION':
267 $n = 'CAPTION';
268 break;
269 case 'NOWIKI':
270 $n = 'NOWIKI';
271 break;
272 }
273 if ($n != $old_n) {
274 unset ($this->attrs['NAME']); # Cleanup
275 } elseif ($parser->nowiki > 0) {
276 # No 'real' wiki tags allowed in nowiki section
277 $n = '';
278 }
279 } // $n = 'EXTENSION'
280
281 switch($n) {
282 case 'ARTICLE':
283 $ret .= $this->sub_makeXHTML($parser);
284 break;
285 case 'HEADING':
286 $ret .= $this->sub_makeXHTML($parser, 'h'.$this->attrs['LEVEL']);
287 break;
288 case 'PARAGRAPH':
289 $ret .= $this->sub_makeXHTML($parser, 'p');
290 break;
291 case 'BOLD':
292 $ret .= $this->sub_makeXHTML($parser, 'strong');
293 break;
294 case 'ITALICS':
295 $ret .= $this->sub_makeXHTML($parser, 'em');
296 break;
297
298 # These don't exist as wiki markup
299 case 'UNDERLINED':
300 $ret .= $this->sub_makeXHTML($parser, 'u');
301 break;
302 case 'STRIKE':
303 $ret .= $this->sub_makeXHTML($parser, 'strike');
304 break;
305
306 # HTML comment
307 case 'COMMENT':
308 # Comments are parsed out
309 $ret .= '';
310 break;
311
312
313 # Links
314 case 'LINK':
315 $ret .= $this->makeInternalLink($parser);
316 break;
317 case 'LINKTARGET':
318 case 'LINKOPTION':
319 $ret .= $this->sub_makeXHTML($parser);
320 break;
321
322 case 'TEMPLATE':
323 $parts = $this->sub_makeXHTML($parser);
324 $parts = explode('|', $parts);
325 $title = array_shift($parts);
326 $ret .= $this->getTemplateXHTML($title, $parts, & $parser);
327 break;
328
329 case 'TEMPLATEVAR':
330 $x = $this->sub_makeXHTML($parser);
331 if (isset ($parser->mCurrentTemplateOptions["{$x}"]))
332 $ret .= $parser->mCurrentTemplateOptions["{$x}"];
333 break;
334
335 # Internal use, not generated by wiki2xml parser
336 case 'IGNORE':
337 $ret .= $this->sub_makeXHTML($parser);
338
339 case 'NOWIKI':
340 $parser->nowiki++;
341 $ret .= $this->sub_makeXHTML($parser, '');
342 $parser->nowiki--;
343
344
345 # Unknown HTML extension
346 case 'EXTENSION': # This is currently a dummy!!!
347 $ext = $this->attrs['NAME'];
348
349 $ret .= '&lt;'.$ext.'&gt;';
350 $ret .= $this->sub_makeXHTML($parser);
351 $ret .= '&lt;/'.$ext.'&gt; ';
352 break;
353
354
355 # Table stuff
356
357 case 'TABLE':
358 $ret .= $this->sub_makeXHTML($parser, 'table');
359 break;
360 case 'TABLEROW':
361 $ret .= $this->sub_makeXHTML($parser, 'tr');
362 break;
363 case 'TABLECELL':
364 $ret .= $this->sub_makeXHTML($parser, 'td');
365 break;
366 case 'TABLEHEAD':
367 $ret .= $this->sub_makeXHTML($parser, 'th');
368 break;
369 case 'CAPTION':
370 $ret .= $this->sub_makeXHTML($parser, 'caption');
371 break;
372 case 'ATTRS': # SPECIAL CASE : returning attributes
373 return $this->getTheseAttrs();
374
375
376 # Lists stuff
377 case 'LISTITEM':
378 if ($parser->mListType == 'dl')
379 $ret .= $this->sub_makeXHTML($parser, 'dd');
380 else
381 $ret .= $this->sub_makeXHTML($parser, 'li');
382 break;
383 case 'LIST':
384 $type = 'ol'; # Default
385 if ($this->attrs['TYPE'] == 'bullet')
386 $type = 'ul';
387 else
388 if ($this->attrs['TYPE'] == 'indent')
389 $type = 'dl';
390 $oldtype = $parser->mListType;
391 $parser->mListType = $type;
392 $ret .= $this->sub_makeXHTML($parser, $type);
393 $parser->mListType = $oldtype;
394 break;
395
396 # Something else entirely
397 default:
398 $ret .= '&lt;'.$n.'&gt;';
399 $ret .= $this->sub_makeXHTML($parser);
400 $ret .= '&lt;/'.$n.'&gt; ';
401 } // switch($n)
402
403 $ret = "\n{$ret}\n";
404 $ret = str_replace("\n\n", "\n", $ret);
405 return $ret;
406 }
407
408 /**
409 * A function for additional debugging output
410 */
411 function myPrint() {
412 $ret = "<ul>\n";
413 $ret .= "<li> <b> Name: </b> $this->name </li>\n";
414 // print attributes
415 $ret .= '<li> <b> Attributes: </b>';
416 foreach ($this->attrs as $name => $value) {
417 $ret .= "$name => $value; ";
418 }
419 $ret .= " </li>\n";
420 // print children
421 foreach ($this->children as $child) {
422 if (is_string($child)) {
423 $ret .= "<li> $child </li>\n";
424 } else {
425 $ret .= $child->myPrint();
426 }
427 }
428 $ret .= "</ul>\n";
429 return $ret;
430 }
431 }
432
433 $ancStack = array (); // the stack with ancestral elements
434
435 // START Three global functions needed for parsing, sorry guys
436 /** @todo document */
437 function wgXMLstartElement($parser, $name, $attrs) {
438 global $ancStack;
439
440 $newElem = new element;
441 $newElem->name = $name;
442 $newElem->attrs = $attrs;
443
444 array_push($ancStack, $newElem);
445 }
446
447 /** @todo document */
448 function wgXMLendElement($parser, $name) {
449 global $ancStack, $rootElem;
450 // pop element off stack
451 $elem = array_pop($ancStack);
452 if (count($ancStack) == 0)
453 $rootElem = $elem;
454 else
455 // add it to its parent
456 array_push($ancStack[count($ancStack) - 1]->children, $elem);
457 }
458
459 /** @todo document */
460 function wgXMLcharacterData($parser, $data) {
461 global $ancStack;
462 $data = trim($data); // Don't add blank lines, they're no use...
463 // add to parent if parent exists
464 if ($ancStack && $data != "") {
465 array_push($ancStack[count($ancStack) - 1]->children, $data);
466 }
467 }
468 // END Three global functions needed for parsing, sorry guys
469
470 /**
471 * Here's the class that generates a nice tree
472 * @package MediaWiki
473 * @subpackage Experimental
474 */
475 class xml2php {
476
477 /** @todo document */
478 function & scanFile($filename) {
479 global $ancStack, $rootElem;
480 $ancStack = array ();
481
482 $xml_parser = xml_parser_create();
483 xml_set_element_handler($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
484 xml_set_character_data_handler($xml_parser, 'wgXMLcharacterData');
485 if (!($fp = fopen($filename, 'r'))) {
486 die('could not open XML input');
487 }
488 while ($data = fread($fp, 4096)) {
489 if (!xml_parse($xml_parser, $data, feof($fp))) {
490 die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
491 }
492 }
493 xml_parser_free($xml_parser);
494
495 // return the remaining root element we copied in the beginning
496 return $rootElem;
497 }
498
499 /** @todo document */
500 function scanString($input) {
501 global $ancStack, $rootElem;
502 $ancStack = array ();
503
504 $xml_parser = xml_parser_create();
505 xml_set_element_handler($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
506 xml_set_character_data_handler($xml_parser, 'wgXMLcharacterData');
507
508 if (!xml_parse($xml_parser, $input, true)) {
509 die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
510 }
511 xml_parser_free($xml_parser);
512
513 // return the remaining root element we copied in the beginning
514 return $rootElem;
515 }
516
517 }
518
519 /**
520 * @todo document
521 * @package MediaWiki
522 * @subpackage Experimental
523 */
524 class ParserXML extends Parser {
525 /**#@+
526 * @private
527 */
528 # Persistent:
529 var $mTagHooks, $mListType;
530
531 # Cleared with clearState():
532 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array ();
533 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
534
535 # Temporary:
536 var $mOptions, $mTitle, $mOutputType, $mTemplates, // cache of already loaded templates, avoids
537 // multiple SQL queries for the same string
538 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
539 // in this path. Used for loop detection.
540
541 var $nowikicount, $mCurrentTemplateOptions;
542
543 /**#@-*/
544
545 /**
546 * Constructor
547 *
548 * @public
549 */
550 function ParserXML() {
551 $this->mTemplates = array ();
552 $this->mTemplatePath = array ();
553 $this->mTagHooks = array ();
554 $this->clearState();
555 }
556
557 /**
558 * Clear Parser state
559 *
560 * @private
561 */
562 function clearState() {
563 $this->mOutput = new ParserOutput;
564 $this->mAutonumber = 0;
565 $this->mLastSection = "";
566 $this->mDTopen = false;
567 $this->mVariables = false;
568 $this->mIncludeCount = array ();
569 $this->mStripState = array ();
570 $this->mArgStack = array ();
571 $this->mInPre = false;
572 }
573
574 /**
575 * Turns the wikitext into XML by calling the external parser
576 *
577 */
578 function html2xml(& $text) {
579 global $wgWiki2xml;
580
581 # generating html2xml command path
582 $a = $wgWiki2xml;
583 $a = explode('/', $a);
584 array_pop($a);
585 $a[] = 'html2xml';
586 $html2xml = implode('/', $a);
587 $a = array ();
588
589 $tmpfname = tempnam( wfTempDir(), 'FOO' );
590 $handle = fopen($tmpfname, 'w');
591 fwrite($handle, utf8_encode($text));
592 fclose($handle);
593 exec($html2xml.' < '.$tmpfname, $a);
594 $text = utf8_decode(implode("\n", $a));
595 unlink($tmpfname);
596 }
597
598 /** @todo document */
599 function runXMLparser(& $text) {
600 global $wgWiki2xml;
601
602 $this->html2xml($text);
603
604 $tmpfname = tempnam( wfTempDir(), 'FOO');
605 $handle = fopen($tmpfname, 'w');
606 fwrite($handle, $text);
607 fclose($handle);
608 exec($wgWiki2xml.' < '.$tmpfname, $a);
609 $text = utf8_decode(implode("\n", $a));
610 unlink($tmpfname);
611 }
612
613 /** @todo document */
614 function plain_parse(& $text, $inline = false, $templateOptions = array ()) {
615 $this->runXMLparser($text);
616 $nowikicount = 0;
617 $w = new xml2php;
618 $result = $w->scanString($text);
619
620 $oldTemplateOptions = $this->mCurrentTemplateOptions;
621 $this->mCurrentTemplateOptions = $templateOptions;
622
623 if ($inline) { # Inline rendering off for templates
624 if (count($result->children) == 1)
625 $result->children[0]->name = 'IGNORE';
626 }
627
628 if (1)
629 $text = $result->makeXHTML($this); # No debugging info
630 else
631 $text = $result->makeXHTML($this).'<hr>'.$text.'<hr>'.$result->myPrint();
632 $this->mCurrentTemplateOptions = $oldTemplateOptions;
633 }
634
635 /** @todo document */
636 function parse($text, & $title, $options, $linestart = true, $clearState = true) {
637 $this->plain_parse($text);
638 $this->mOutput->setText($text);
639 return $this->mOutput;
640 }
641
642 }
643 ?>