b5dae85fb3a4b6862621238edaf521315b5c07a6
5 * @subpackage Experimental
9 * This should one day become the XML->(X)HTML parser
10 * Based on work by Jan Hidders and Magnus Manske
12 * $wgUseXMLparser = true ;
13 * $wgEnableParserCache = false ;
14 * $wgWiki2xml to the path and executable of the command line version (cli)
15 * in LocalSettings.php
17 * @subpackage Experimental
21 * the base class for an element
23 * @subpackage Experimental
27 var $attrs = array ();
28 var $children = array ();
31 * This finds the ATTRS element and returns the ATTR sub-children as a single string
32 * @todo FIXME $parser always empty when calling makeXHTML()
34 function getSourceAttrs() {
36 foreach ($this->children
as $child) {
37 if (!is_string($child) AND $child->name
== 'ATTRS') {
38 $ret = $child->makeXHTML($parser);
45 * This collects the ATTR thingies for getSourceAttrs()
47 function getTheseAttrs() {
49 foreach ($this->children
as $child) {
50 if (!is_string($child) AND $child->name
== 'ATTR') {
51 $ret[] = $child->attrs
["NAME"]."='".$child->children
[0]."'";
54 return implode(' ', $ret);
57 function fixLinkTails(& $parser, $key) {
59 if (!isset ($this->children
[$k2]))
61 if (!is_string($this->children
[$k2]))
63 if (is_string($this->children
[$key]))
65 if ($this->children
[$key]->name
!= "LINK")
68 $n = $this->children
[$k2];
70 while ($n != '' AND (($n[0] >= 'a' AND $n[0] <= 'z') OR $n[0] == 'ä' OR $n[0] == 'ö' OR $n[0] == 'ü' OR $n[0] == 'ß')) {
74 $this->children
[$k2] = $n;
76 if (count($this->children
[$key]->children
) > 1) {
77 $kl = array_keys($this->children
[$key]->children
);
79 $this->children
[$key]->children
[$kl]->children
[] = $s;
82 $e->name
= "LINKOPTION";
83 $t = $this->children
[$key]->sub_makeXHTML($parser);
84 $e->children
[] = trim($t).$s;
85 $this->children
[$key]->children
[] = $e;
90 * This function generates the XHTML for the entire subtree
92 function sub_makeXHTML(& $parser, $tag = '', $attr = '') {
95 $attr2 = $this->getSourceAttrs();
96 if ($attr != '' AND $attr2 != '')
107 # THIS SHOULD BE DONE IN THE WIKI2XML-PARSER INSTEAD
108 # foreach ( array_keys ( $this->children ) AS $x )
109 # $this->fixLinkTails ( $parser , $x ) ;
111 foreach ($this->children
as $child) {
112 if (is_string($child)) {
114 } elseif ($child->name
!= 'ATTRS') {
115 $ret .= $child->makeXHTML($parser);
119 $ret .= '</'.$tag.">\n";
126 function createInternalLink(& $parser, $target, $display_title, $options) {
128 $skin = $wgUser->getSkin();
129 $tp = explode(':', $target); # tp = target parts
130 $title = ''; # The plain title
131 $language = ''; # The language/meta/etc. part
132 $namespace = ''; # The namespace, if any
133 $subtarget = ''; # The '#' thingy
135 $nt = Title
:: newFromText($target);
136 $fl = strtoupper($this->attrs
['FORCEDLINK']) == 'YES';
138 if ($fl ||
count($tp) == 1) {
139 # Plain and simple case
142 # There's stuff missing here...
143 if ($nt->getNamespace() == NS_IMAGE
) {
144 $options[] = $display_title;
145 return $parser->makeImage($nt, implode('|', $options));
152 if ($language != '') {
153 # External link within the WikiMedia project
154 return "{language link}";
156 if ($namespace != '') {
157 # Link to another namespace, check for image/media stuff
158 return "{namespace link}";
160 return $skin->makeLink($target, $display_title);
165 /** @todo document */
166 function makeInternalLink(& $parser) {
169 foreach ($this->children
as $child) {
170 if (is_string($child)) {
171 # This shouldn't be the case!
173 if ($child->name
== 'LINKTARGET') {
174 $target = trim($child->makeXHTML($parser));
176 $option[] = trim($child->makeXHTML($parser));
181 if (count($option) == 0)
182 $option[] = $target; # Create dummy display title
183 $display_title = array_pop($option);
184 return $this->createInternalLink($parser, $target, $display_title, $option);
187 /** @todo document */
188 function getTemplateXHTML($title, $parts, & $parser) {
189 global $wgLang, $wgUser;
190 $skin = $wgUser->getSkin();
191 $ot = $title; # Original title
192 if (count(explode(':', $title)) == 1)
193 $title = $wgLang->getNsText(NS_TEMPLATE
).":".$title;
194 $nt = Title
:: newFromText($title);
195 $id = $nt->getArticleID();
197 # No/non-existing page
198 return $skin->makeBrokenLink($title, $ot);
202 $tv = array (); # Template variables
203 foreach ($parts AS $part) {
205 $x = explode('=', $part, 2);
210 $value = array_pop($x);
213 $art = new Article($nt);
214 $text = $art->getContent(false);
215 $parser->plain_parse($text, true, $tv);
221 * This function actually converts wikiXML into XHTML tags
222 * @todo use switch() !
224 function makeXHTML(& $parser) {
226 $n = $this->name
; # Shortcut
228 if ($n == 'EXTENSION') {
231 $ext = strtoupper($this->attrs
['NAME']);
243 $n = 'UNDERLINED'; # Hey, virtual wiki tag! ;-)
271 unset ($this->attrs
['NAME']); # Cleanup
272 } elseif ($parser->nowiki
> 0) {
273 # No 'real' wiki tags allowed in nowiki section
276 } // $n = 'EXTENSION'
280 $ret .= $this->sub_makeXHTML($parser);
283 $ret .= $this->sub_makeXHTML($parser, 'h'.$this->attrs
['LEVEL']);
286 $ret .= $this->sub_makeXHTML($parser, 'p');
289 $ret .= $this->sub_makeXHTML($parser, 'strong');
292 $ret .= $this->sub_makeXHTML($parser, 'em');
295 # These don't exist as wiki markup
297 $ret .= $this->sub_makeXHTML($parser, 'u');
300 $ret .= $this->sub_makeXHTML($parser, 'strike');
305 # Comments are parsed out
312 $ret .= $this->makeInternalLink($parser);
316 $ret .= $this->sub_makeXHTML($parser);
320 $parts = $this->sub_makeXHTML($parser);
321 $parts = explode('|', $parts);
322 $title = array_shift($parts);
323 $ret .= $this->getTemplateXHTML($title, $parts, & $parser);
327 $x = $this->sub_makeXHTML($parser);
328 if (isset ($parser->mCurrentTemplateOptions
["{$x}"]))
329 $ret .= $parser->mCurrentTemplateOptions
["{$x}"];
332 # Internal use, not generated by wiki2xml parser
334 $ret .= $this->sub_makeXHTML($parser);
338 $ret .= $this->sub_makeXHTML($parser, '');
342 # Unknown HTML extension
343 case 'EXTENSION': # This is currently a dummy!!!
344 $ext = $this->attrs
['NAME'];
346 $ret .= '<'.$ext.'>';
347 $ret .= $this->sub_makeXHTML($parser);
348 $ret .= '</'.$ext.'> ';
355 $ret .= $this->sub_makeXHTML($parser, 'table');
358 $ret .= $this->sub_makeXHTML($parser, 'tr');
361 $ret .= $this->sub_makeXHTML($parser, 'td');
364 $ret .= $this->sub_makeXHTML($parser, 'th');
367 $ret .= $this->sub_makeXHTML($parser, 'caption');
369 case 'ATTRS': # SPECIAL CASE : returning attributes
370 return $this->getTheseAttrs();
375 if ($parser->mListType
== 'dl')
376 $ret .= $this->sub_makeXHTML($parser, 'dd');
378 $ret .= $this->sub_makeXHTML($parser, 'li');
381 $type = 'ol'; # Default
382 if ($this->attrs
['TYPE'] == 'bullet')
385 if ($this->attrs
['TYPE'] == 'indent')
387 $oldtype = $parser->mListType
;
388 $parser->mListType
= $type;
389 $ret .= $this->sub_makeXHTML($parser, $type);
390 $parser->mListType
= $oldtype;
393 # Something else entirely
395 $ret .= '<'.$n.'>';
396 $ret .= $this->sub_makeXHTML($parser);
397 $ret .= '</'.$n.'> ';
401 $ret = str_replace("\n\n", "\n", $ret);
406 * A function for additional debugging output
410 $ret .= "<li> <b> Name: </b> $this->name </li>\n";
412 $ret .= '<li> <b> Attributes: </b>';
413 foreach ($this->attrs
as $name => $value) {
414 $ret .= "$name => $value; ";
418 foreach ($this->children
as $child) {
419 if (is_string($child)) {
420 $ret .= "<li> $child </li>\n";
422 $ret .= $child->myPrint();
430 $ancStack = array (); // the stack with ancestral elements
432 // START Three global functions needed for parsing, sorry guys
433 /** @todo document */
434 function wgXMLstartElement($parser, $name, $attrs) {
437 $newElem = new element
;
438 $newElem->name
= $name;
439 $newElem->attrs
= $attrs;
441 array_push($ancStack, $newElem);
444 /** @todo document */
445 function wgXMLendElement($parser, $name) {
446 global $ancStack, $rootElem;
447 // pop element off stack
448 $elem = array_pop($ancStack);
449 if (count($ancStack) == 0)
452 // add it to its parent
453 array_push($ancStack[count($ancStack) - 1]->children
, $elem);
456 /** @todo document */
457 function wgXMLcharacterData($parser, $data) {
459 $data = trim($data); // Don't add blank lines, they're no use...
460 // add to parent if parent exists
461 if ($ancStack && $data != "") {
462 array_push($ancStack[count($ancStack) - 1]->children
, $data);
465 // END Three global functions needed for parsing, sorry guys
468 * Here's the class that generates a nice tree
470 * @subpackage Experimental
474 /** @todo document */
475 function & scanFile($filename) {
476 global $ancStack, $rootElem;
477 $ancStack = array ();
479 $xml_parser = xml_parser_create();
480 xml_set_element_handler($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
481 xml_set_character_data_handler($xml_parser, 'wgXMLcharacterData');
482 if (!($fp = fopen($filename, 'r'))) {
483 die('could not open XML input');
485 while ($data = fread($fp, 4096)) {
486 if (!xml_parse($xml_parser, $data, feof($fp))) {
487 die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
490 xml_parser_free($xml_parser);
492 // return the remaining root element we copied in the beginning
496 /** @todo document */
497 function scanString($input) {
498 global $ancStack, $rootElem;
499 $ancStack = array ();
501 $xml_parser = xml_parser_create();
502 xml_set_element_handler($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
503 xml_set_character_data_handler($xml_parser, 'wgXMLcharacterData');
505 if (!xml_parse($xml_parser, $input, true)) {
506 die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
508 xml_parser_free($xml_parser);
510 // return the remaining root element we copied in the beginning
519 * @subpackage Experimental
521 class ParserXML
extends Parser
{
526 var $mTagHooks, $mListType;
528 # Cleared with clearState():
529 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array ();
530 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
533 var $mOptions, $mTitle, $mOutputType, $mTemplates, // cache of already loaded templates, avoids
534 // multiple SQL queries for the same string
535 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
536 // in this path. Used for loop detection.
538 var $nowikicount, $mCurrentTemplateOptions;
547 function ParserXML() {
548 $this->mTemplates
= array ();
549 $this->mTemplatePath
= array ();
550 $this->mTagHooks
= array ();
559 function clearState() {
560 $this->mOutput
= new ParserOutput
;
561 $this->mAutonumber
= 0;
562 $this->mLastSection
= "";
563 $this->mDTopen
= false;
564 $this->mVariables
= false;
565 $this->mIncludeCount
= array ();
566 $this->mStripState
= array ();
567 $this->mArgStack
= array ();
568 $this->mInPre
= false;
572 * Turns the wikitext into XML by calling the external parser
575 function html2xml(& $text) {
578 # generating html2xml command path
580 $a = explode('/', $a);
583 $html2xml = implode('/', $a);
586 $tmpfname = tempnam( wfTempDir(), 'FOO' );
587 $handle = fopen($tmpfname, 'w');
588 fwrite($handle, utf8_encode($text));
590 exec($html2xml.' < '.$tmpfname, $a);
591 $text = utf8_decode(implode("\n", $a));
595 /** @todo document */
596 function runXMLparser(& $text) {
599 $this->html2xml($text);
601 $tmpfname = tempnam( wfTempDir(), 'FOO');
602 $handle = fopen($tmpfname, 'w');
603 fwrite($handle, $text);
605 exec($wgWiki2xml.' < '.$tmpfname, $a);
606 $text = utf8_decode(implode("\n", $a));
610 /** @todo document */
611 function plain_parse(& $text, $inline = false, $templateOptions = array ()) {
612 $this->runXMLparser($text);
615 $result = $w->scanString($text);
617 $oldTemplateOptions = $this->mCurrentTemplateOptions
;
618 $this->mCurrentTemplateOptions
= $templateOptions;
620 if ($inline) { # Inline rendering off for templates
621 if (count($result->children
) == 1)
622 $result->children
[0]->name
= 'IGNORE';
626 $text = $result->makeXHTML($this); # No debugging info
628 $text = $result->makeXHTML($this).'<hr>'.$text.'<hr>'.$result->myPrint();
629 $this->mCurrentTemplateOptions
= $oldTemplateOptions;
632 /** @todo document */
633 function parse($text, & $title, $options, $linestart = true, $clearState = true) {
634 $this->plain_parse($text);
635 $this->mOutput
->setText($text);
636 return $this->mOutput
;