indent lists
[lhc/web/wiklou.git] / includes / ParserXML.php
1 <?php
2 require_once ( "Parser.php" ) ;
3
4 /**
5 * This should one day become the XML->(X)HTML parser
6 * Based on work by Jan Hidders and Magnus Manske
7 * To use, set
8 * $wgUseXMLparser = true ;
9 * $wgEnableParserCache = false ;
10 * $wgWiki2xml to the path and executable of the command line version (cli)
11 * in LocalSettings.php
12 * @package MediaWiki
13 * @subpackage Experimental
14 */
15
16 /**
17 * the base class for an element
18 */
19 class element {
20 var $name = '';
21 var $attrs = array();
22 var $children = array();
23
24 /**
25 * This finds the ATTRS element and returns the ATTR sub-children as a single string
26 */
27 function getSourceAttrs ()
28 {
29 $ret = "" ;
30 foreach ($this->children as $child)
31 {
32 if ( !is_string($child) AND $child->name == "ATTRS" )
33 {
34 $ret = $child->makeXHTML ( $parser );
35 }
36 }
37 return $ret ;
38 }
39
40 /**
41 * This collects the ATTR thingies for getSourceAttrs()
42 */
43 function getTheseAttrs ()
44 {
45 $ret = array() ;
46 foreach ($this->children as $child)
47 {
48 if ( !is_string($child) AND $child->name == "ATTR" )
49 {
50 $ret[] = $child->attrs["NAME"] . "='" . $child->children[0] . "'" ;
51 }
52 }
53 return implode ( " " , $ret ) ;
54 }
55
56 /**
57 * This function generates the XHTML for the entire subtree
58 */
59 function sub_makeXHTML ( &$parser , $tag = "" , $attr = "" )
60 {
61 $ret = "" ;
62
63 $attr2 = $this->getSourceAttrs () ;
64 if ( $attr != "" AND $attr2 != "" ) $attr .= " " ;
65 $attr .= $attr2 ;
66
67 if ( $tag != "" )
68 {
69 $ret .= "<" . $tag ;
70 if ( $attr != "" ) $ret .= " " . $attr ;
71 $ret .= ">" ;
72 }
73
74 foreach ($this->children as $child) {
75 if ( is_string($child) ) {
76 $ret .= $child ;
77 } else if ( $child->name != "ATTRS" ) {
78 $ret .= $child->makeXHTML ( $parser );
79 }
80 }
81 if ( $tag != "" )
82 $ret .= "</" . $tag . ">\n" ;
83 return $ret ;
84 }
85
86 /**
87 * Link functions
88 */
89 function createInternalLink ( &$parser , $target , $display_title , $options )
90 {
91 global $wgUser ;
92 $skin = $wgUser->getSkin() ;
93 $tp = explode ( ":" , $target ) ; # tp = target parts
94 $title = "" ; # The plain title
95 $language = "" ; # The language/meta/etc. part
96 $namespace = "" ; # The namespace, if any
97 $subtarget = "" ; # The '#' thingy
98
99
100 $nt = Title::newFromText ( $target ) ;
101 $fl = strtoupper ( $this->attrs["FORCEDLINK"] ) == "YES" ;
102
103 if ( $fl || count ( $tp ) == 1 ) $title = $target ; # Plain and simple case
104 else # There's stuff missing here...
105 {
106 if ( $nt->getNamespace() == NS_IMAGE )
107 {
108 $options[] = $display_title ;
109 return $skin->makeImageLinkObj ( $nt , implode ( "|" , $options ) ) ;
110 }
111 else $title = $target ; # Default
112 }
113
114 if ( $language != "" ) # External link within the WikiMedia project
115 {
116 return "{language link}" ;
117 }
118 else if ( $namespace != "" ) # Link to another namespace, check for image/media stuff
119 {
120 return "{namespace link}" ;
121 }
122 else
123 {
124 return $skin->makeLink ( $target , $display_title ) ;
125 }
126 }
127
128 function makeInternalLink ( &$parser )
129 {
130 $target = "" ;
131 $option = array () ;
132 foreach ($this->children as $child) {
133 if ( is_string($child) ) {
134 # This shouldn't be the case!
135 } else {
136 if ( $child->name == "LINKTARGET" )
137 $target = trim ( $child->makeXHTML ( $parser ) ) ;
138 else
139 $option[] = trim ( $child->makeXHTML ( $parser ) ) ;
140 }
141 }
142
143 if ( count ( $option ) == 0 ) $option[] = $target ; # Create dummy display title
144 $display_title = array_pop ( $option ) ;
145 return $this->createInternalLink ( $parser , $target , $display_title , $option ) ;
146 }
147
148 function getTemplateXHTML ( $title , $parts , &$parser ) {
149 global $wgLang , $wgUser ;
150 $skin = $wgUser->getSkin() ;
151 $ot = $title ; # Original title
152 if ( count ( explode ( ":" , $title ) ) == 1 )
153 $title = $wgLang->getNsText ( NS_TEMPLATE ) . ":" . $title ;
154 $nt = Title::newFromText ( $title ) ;
155 $id = $nt->getArticleID() ;
156 if ( $id == 0 ) { # No/non-existing page
157 return $skin->makeBrokenLink ( $title , $ot ) ;
158 }
159
160 $a = 0 ;
161 $tv = array () ; # Template variables
162 foreach ( $parts AS $part ) {
163 $a++ ;
164 $x = explode ( "=" , $part , 2 ) ;
165 if ( count ( $x ) == 1 ) $key = "{$a}" ;
166 else $key = $x[0] ;
167 $value = array_pop ( $x ) ;
168 $tv[$key] = $value ;
169 }
170 $art = new Article ( $nt ) ;
171 $text = $art->getContent ( false ) ;
172 $parser->plain_parse ( $text , true , $tv ) ;
173
174 return $text ;
175 }
176
177 /**
178 * This function actually converts wikiXML into XHTML tags
179 */
180 function makeXHTML ( &$parser )
181 {
182 $ret = "" ;
183 $n = $this->name ; # Shortcut
184
185 if ( $n == "EXTENSION" ) # Fix allowed HTML
186 {
187 $old_n = $n ;
188 $ext = strtoupper ( $this->attrs["NAME"] ) ;
189 if ( $ext == "B" || $ext == "STRONG" ) $n = "BOLD" ;
190 else if ( $ext == "I" || $ext == "EM" ) $n = "ITALICS" ;
191 else if ( $ext == "U" ) $n = "UNDERLINED" ; # Hey, virtual wiki tag! ;-)
192 else if ( $ext == "S" ) $n = "STRIKE" ;
193 else if ( $ext == "P" ) $n = "PARAGRAPH" ;
194 else if ( $ext == "TABLE" ) $n = "TABLE" ;
195 else if ( $ext == "TR" ) $n = "TABLEROW" ;
196 else if ( $ext == "TD" ) $n = "TABLECELL" ;
197 else if ( $ext == "TH" ) $n = "TABLEHEAD" ;
198 else if ( $ext == "CAPTION" ) $n = "CAPTION" ;
199 else if ( $ext == "NOWIKI" ) $n = "NOWIKI" ;
200 if ( $n != $old_n ) unset ( $this->attrs["NAME"] ) ; # Cleanup
201 else if ( $parser->nowiki > 0 ) $n = "" ; # No "real" wiki tags allowed in nowiki section
202 }
203
204 if ( $n == "ARTICLE" )
205 $ret .= $this->sub_makeXHTML ( $parser ) ;
206 else if ( $n == "HEADING" )
207 $ret .= $this->sub_makeXHTML ( $parser , "h" . $this->attrs["LEVEL"] ) ;
208 else if ( $n == "PARAGRAPH" )
209 $ret .= $this->sub_makeXHTML ( $parser , "p" ) ;
210 else if ( $n == "BOLD" )
211 $ret .= $this->sub_makeXHTML ( $parser , "strong" ) ;
212 else if ( $n == "ITALICS" )
213 $ret .= $this->sub_makeXHTML ( $parser , "em" ) ;
214
215 # These don't exist as wiki markup
216 else if ( $n == "UNDERLINED" )
217 $ret .= $this->sub_makeXHTML ( $parser , "u" ) ;
218 else if ( $n == "STRIKE" )
219 $ret .= $this->sub_makeXHTML ( $parser , "strike" ) ;
220
221 # HTML comment
222 else if ( $n == "COMMENT" )
223 $ret .= "" ; # Comments are parsed out
224
225 # Links
226 else if ( $n == "LINK" )
227 $ret .= $this->makeInternalLink ( $parser ) ;
228 else if ( $n == "LINKTARGET" )
229 $ret .= $this->sub_makeXHTML ( $parser ) ;
230 else if ( $n == "LINKOPTION" )
231 $ret .= $this->sub_makeXHTML ( $parser ) ;
232
233 else if ( $n == "TEMPLATE" )
234 {
235 $parts = $this->sub_makeXHTML ( $parser ) ;
236 $parts = explode ( "|" , $parts ) ;
237 $title = array_shift ( $parts ) ;
238 $ret .= $this->getTemplateXHTML ( $title , $parts , &$parser ) ;
239 }
240 else if ( $n == "TEMPLATEVAR" )
241 {
242 $x = $this->sub_makeXHTML ( $parser ) ;
243 if ( isset ( $parser->mCurrentTemplateOptions["{$x}"] ) )
244 $ret .= $parser->mCurrentTemplateOptions["{$x}"] ;
245 }
246
247 else if ( $n == "IGNORE" ) # Internal use, not generated by wiki2xml parser
248 $ret .= $this->sub_makeXHTML ( $parser ) ;
249
250 else if ( $n == "NOWIKI" )
251 {
252 $parser->nowiki++ ;
253 $ret .= $this->sub_makeXHTML ( $parser , "" ) ;
254 $parser->nowiki-- ;
255 }
256
257 # Unknown HTML extension
258 else if ( $n == "EXTENSION" ) # This is currently a dummy!!!
259 {
260 $ext = $this->attrs["NAME"] ;
261
262 $ret .= "&lt;" . $ext . "&gt;" ;
263 $ret .= $this->sub_makeXHTML ( $parser ) ;
264 $ret .= "&lt;/" . $ext . "&gt; " ;
265 }
266
267 # Table stuff
268 else if ( $n == "TABLE" )
269 {
270 $ret .= $this->sub_makeXHTML ( $parser , "table" ) ;
271 }
272 else if ( $n == "TABLEROW" )
273 {
274 $ret .= $this->sub_makeXHTML ( $parser , "tr" ) ;
275 }
276 else if ( $n == "TABLECELL" )
277 {
278 $ret .= $this->sub_makeXHTML ( $parser , "td" ) ;
279 }
280 else if ( $n == "TABLEHEAD" )
281 {
282 $ret .= $this->sub_makeXHTML ( $parser , "th" ) ;
283 }
284 else if ( $n == "CAPTION" )
285 {
286 $ret .= $this->sub_makeXHTML ( $parser , "caption" ) ;
287 }
288
289 else if ( $n == "ATTRS" ) # SPECIAL CASE : returning attributes
290 {
291 return $this->getTheseAttrs () ;
292 }
293
294 # Lists
295 else if ( $n == "LISTITEM" )
296 {
297 if ( $parser->mListType == "dl" ) $ret .= $this->sub_makeXHTML ( $parser , "dd" ) ;
298 else $ret .= $this->sub_makeXHTML ( $parser , "li" ) ;
299 }
300 else if ( $n == "LIST" )
301 {
302 $type = "ol" ; # Default
303 if ( $this->attrs["TYPE"] == "bullet" ) $type = "ul" ;
304 else if ( $this->attrs["TYPE"] == "indent" ) $type = "dl" ;
305 $oldtype = $parser->mListType ;
306 $parser->mListType = $type ;
307 $ret .= $this->sub_makeXHTML ( $parser , $type ) ;
308 $parser->mListType = $oldtype ;
309 }
310
311 # Something else entirely
312 else
313 {
314 $ret .= "&lt;" . $n . "&gt;" ;
315 $ret .= $this->sub_makeXHTML ( $parser ) ;
316 $ret .= "&lt;/" . $n . "&gt; " ;
317 }
318
319 $ret = "\n{$ret}\n" ;
320 $ret = str_replace ( "\n\n" , "\n" , $ret ) ;
321 return $ret ;
322 }
323
324 /**
325 * A function for additional debugging output
326 */
327 function myPrint() {
328 $ret = "<ul>\n";
329 $ret .= "<li> <b> Name: </b> $this->name </li>\n";
330 // print attributes
331 $ret .= '<li> <b> Attributes: </b>';
332 foreach ($this->attrs as $name => $value) {
333 $ret .= "$name => $value; " ;
334 }
335 $ret .= " </li>\n";
336 // print children
337 foreach ($this->children as $child) {
338 if ( is_string($child) ) {
339 $ret .= "<li> $child </li>\n";
340 } else {
341 $ret .= $child->myPrint();
342 }
343 }
344 $ret .= "</ul>\n";
345 return $ret;
346 }
347 }
348
349 $ancStack = array(); // the stack with ancestral elements
350
351 // Three global functions needed for parsing, sorry guys
352 function wgXMLstartElement($parser, $name, $attrs) {
353 global $ancStack;
354
355 $newElem = new element;
356 $newElem->name = $name;
357 $newElem->attrs = $attrs;
358
359 array_push($ancStack, $newElem);
360 }
361
362 function wgXMLendElement($parser, $name) {
363 global $ancStack, $rootElem;
364 // pop element off stack
365 $elem = array_pop ($ancStack);
366 if (count ($ancStack) == 0)
367 $rootElem = $elem;
368 else
369 // add it to its parent
370 array_push ($ancStack[count($ancStack)-1]->children, $elem);
371 }
372
373 function wgXMLcharacterData($parser, $data) {
374 global $ancStack;
375 $data = trim ($data); // Don't add blank lines, they're no use...
376 // add to parent if parent exists
377 if ( $ancStack && $data != "" ) {
378 array_push ($ancStack[count($ancStack)-1]->children, $data);
379 }
380 }
381
382
383 /**
384 * Here's the class that generates a nice tree
385 */
386 class xml2php {
387
388 function &scanFile( $filename ) {
389 global $ancStack, $rootElem;
390 $ancStack = array();
391
392 $xml_parser = xml_parser_create();
393 xml_set_element_handler ($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
394 xml_set_character_data_handler ($xml_parser, 'wgXMLcharacterData');
395 if (!($fp = fopen($filename, 'r'))) {
396 die('could not open XML input');
397 }
398 while ($data = fread($fp, 4096)) {
399 if (!xml_parse($xml_parser, $data, feof($fp))) {
400 die(sprintf("XML error: %s at line %d",
401 xml_error_string(xml_get_error_code($xml_parser)),
402 xml_get_current_line_number($xml_parser)));
403 }
404 }
405 xml_parser_free($xml_parser);
406
407 // return the remaining root element we copied in the beginning
408 return $rootElem;
409 }
410
411 function scanString ( $input ) {
412 global $ancStack, $rootElem;
413 $ancStack = array();
414
415 $xml_parser = xml_parser_create();
416 xml_set_element_handler ($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
417 xml_set_character_data_handler ($xml_parser, 'wgXMLcharacterData');
418
419 if (!xml_parse ($xml_parser, $input, true)) {
420 die (sprintf ("XML error: %s at line %d",
421 xml_error_string(xml_get_error_code($xml_parser)),
422 xml_get_current_line_number($xml_parser)));
423 }
424 xml_parser_free ($xml_parser);
425
426 // return the remaining root element we copied in the beginning
427 return $rootElem;
428 }
429
430 }
431
432 class ParserXML EXTENDS Parser
433 {
434 /**#@+
435 * @access private
436 */
437 # Persistent:
438 var $mTagHooks, $mListType;
439
440 # Cleared with clearState():
441 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
442 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
443
444 # Temporary:
445 var $mOptions, $mTitle, $mOutputType,
446 $mTemplates, // cache of already loaded templates, avoids
447 // multiple SQL queries for the same string
448 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
449 // in this path. Used for loop detection.
450
451 var $nowikicount , $mCurrentTemplateOptions ;
452
453 /**#@-*/
454
455 /**
456 * Constructor
457 *
458 * @access public
459 */
460 function ParserXML() {
461 $this->mTemplates = array();
462 $this->mTemplatePath = array();
463 $this->mTagHooks = array();
464 $this->clearState();
465 }
466
467 /**
468 * Clear Parser state
469 *
470 * @access private
471 */
472 function clearState() {
473 $this->mOutput = new ParserOutput;
474 $this->mAutonumber = 0;
475 $this->mLastSection = "";
476 $this->mDTopen = false;
477 $this->mVariables = false;
478 $this->mIncludeCount = array();
479 $this->mStripState = array();
480 $this->mArgStack = array();
481 $this->mInPre = false;
482 }
483
484 /**
485 * Turns the wikitext into XML by calling the external parser
486 *
487 */
488 function runXMLparser ( &$text ) {
489 global $wgWiki2xml ;
490
491 $tmpfname = tempnam("/tmp", "FOO");
492 $handle = fopen($tmpfname, "w");
493 fwrite($handle, $text);
494 fclose($handle);
495 exec ( $wgWiki2xml . " < " . $tmpfname , $a ) ;
496 $text = implode ( "\n" , $a ) ;
497 unlink($tmpfname);
498 }
499
500 function plain_parse ( &$text , $inline = false , $templateOptions = array () ) {
501 $this->runXMLparser ( $text ) ;
502 $nowikicount = 0 ;
503 $w = new xml2php;
504 $result = $w->scanString( $text );
505
506 $oldTemplateOptions = $this->mCurrentTemplateOptions ;
507 $this->mCurrentTemplateOptions = $templateOptions ;
508
509 if ( $inline ) { # Inline rendering off for templates
510 if ( count ( $result->children ) == 1 )
511 $result->children[0]->name = "IGNORE" ;
512 }
513
514 if ( 1 ) $text = $result->makeXHTML ( $this ) ; # No debugging info
515 else $text = $result->makeXHTML ( $this ) . "<hr>" . $text . "<hr>" . $result->myPrint();
516 $this->mCurrentTemplateOptions = $oldTemplateOptions ;
517 }
518
519 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
520 $this->plain_parse ( $text ) ;
521 $this->mOutput->setText ( $text ) ;
522 return $this->mOutput;
523 }
524
525 }
526
527 ?>