Tell regexp parser to use extra analysis on external link regexp;
[lhc/web/wiklou.git] / includes / ParserXML.php
1 <?php
2 require_once ( "Parser.php" ) ;
3
4 /**
5 * This should one day become the XML->(X)HTML parser
6 * Based on work by Jan Hidders and Magnus Manske
7 * @package MediaWiki
8 * @subpackage Experimental
9 */
10
11 /**
12 * the base class for an element
13 */
14 class element {
15 var $name = '';
16 var $attrs = array();
17 var $children = array();
18
19 function sub_makeXHTML ( &$parser , $tag = "" , $attr = "" )
20 {
21 $ret = "" ;
22 if ( $tag != "" )
23 {
24 $ret .= "<" . $tag ;
25 if ( $attr != "" ) $ret .= " " . $attr ;
26 $ret .= ">" ;
27 }
28 foreach ($this->children as $child) {
29 if ( is_string($child) ) {
30 $ret .= $child ;
31 } else {
32 $ret .= $child->makeXHTML ( $parser );
33 }
34 }
35 if ( $tag != "" )
36 $ret .= "</" . $tag . ">\n" ;
37 return $ret ;
38 }
39
40 function createInternalLink ( &$parser , $target , $display_title , $options )
41 {
42 $tp = explode ( ":" , $target ) ; # tp = target parts
43 $title = "" ; # The plain title
44 $language = "" ; # The language/meta/etc. part
45 $namespace = "" ; # The namespace, if any
46 $subtarget = "" ; # The '#' thingy
47 if ( count ( $tp ) == 1 ) $title = $target ; # Plain and simple case
48 else
49 {
50 # To be implemented
51 }
52
53 if ( $language != "" ) # External link within the WikiMedia project
54 {
55 return "{language link}" ;
56 }
57 else if ( $namespace != "" ) # Link to another namespace, check for image/media stuff
58 {
59 return "{namespace link}" ;
60 }
61 else
62 {
63 return "{internal link}" ;
64 }
65 }
66
67 function makeInternalLink ( &$parser )
68 {
69 $target = "" ;
70 $option = array () ;
71 foreach ($this->children as $child) {
72 if ( is_string($child) ) {
73 # This shouldn't be the case!
74 } else {
75 if ( $child->name == "LINKTARGET" )
76 $target = trim ( $child->makeXHTML ( $parser ) ) ;
77 else
78 $option[] = trim ( $child->makeXHTML ( $parser ) ) ;
79 }
80 }
81
82 if ( count ( $option ) == 0 ) $option[] = $target ; # Create dummy display title
83 $display_title = array_pop ( $option ) ;
84 return $this->createInternalLink ( $parser , $target , $display_title , $option ) ;
85 }
86
87 function makeXHTML ( &$parser )
88 {
89 $ret = "" ;
90 $n = $this->name ; # Shortcut
91 if ( $n == "ARTICLE" )
92 $ret .= $this->sub_makeXHTML ( $parser ) ;
93 else if ( $n == "HEADING" )
94 $ret .= $this->sub_makeXHTML ( $parser , "h" . $this->attrs["LEVEL"] ) ;
95 else if ( $n == "PARAGRAPH" )
96 $ret .= $this->sub_makeXHTML ( $parser , "p" ) ;
97 else if ( $n == "BOLD" )
98 $ret .= $this->sub_makeXHTML ( $parser , "strong" ) ;
99 else if ( $n == "ITALICS" )
100 $ret .= $this->sub_makeXHTML ( $parser , "em" ) ;
101
102 else if ( $n == "LINK" )
103 $ret .= $this->makeInternalLink ( $parser ) ;
104 else if ( $n == "LINKTARGET" )
105 $ret .= $this->sub_makeXHTML ( $parser ) ;
106 else if ( $n == "LINKOPTION" )
107 $ret .= $this->sub_makeXHTML ( $parser ) ;
108
109 else if ( $n == "EXTENSION" ) # This is currently a dummy!!!
110 {
111 $ext = $this->attrs["NAME"] ;
112
113 $ret .= "&lt;" . $ext . "&gt;" ;
114 $ret .= $this->sub_makeXHTML ( $parser ) ;
115 $ret .= "&lt;/" . $ext . "&gt; " ;
116 }
117
118 else if ( $n == "TABLE" )
119 {
120 $ret .= $this->sub_makeXHTML ( $parser , "table" ) ;
121 }
122 else if ( $n == "TABLEROW" )
123 {
124 $ret .= $this->sub_makeXHTML ( $parser , "tr" ) ;
125 }
126 else if ( $n == "TABLECELL" )
127 {
128 $ret .= $this->sub_makeXHTML ( $parser , "td" ) ;
129 }
130
131
132 else if ( $n == "LISTITEM" )
133 $ret .= $this->sub_makeXHTML ( $parser , "li" ) ;
134 else if ( $n == "LIST" )
135 {
136 $type = "ol" ; # Default
137 if ( $this->attrs["TYPE"] == "bullet" ) $type = "ul" ;
138 $ret .= $this->sub_makeXHTML ( $parser , $type ) ;
139 }
140
141 else
142 {
143 $ret .= "&lt;" . $n . "&gt;" ;
144 $ret .= $this->sub_makeXHTML ( $parser ) ;
145 $ret .= "&lt;/" . $n . "&gt; " ;
146 }
147 $ret = "\n{$ret}\n" ;
148 $ret = str_replace ( "\n\n" , "\n" , $ret ) ;
149 return $ret ;
150 }
151
152 function myPrint() {
153 $ret = "<ul>\n";
154 $ret .= "<li> <b> Name: </b> $this->name </li>\n";
155 // print attributes
156 $ret .= '<li> <b> Attributes: </b>';
157 foreach ($this->attrs as $name => $value) {
158 $ret .= "$name => $value; " ;
159 }
160 $ret .= " </li>\n";
161 // print children
162 foreach ($this->children as $child) {
163 if ( is_string($child) ) {
164 $ret .= "<li> $child </li>\n";
165 } else {
166 $ret .= $child->myPrint();
167 }
168 }
169 $ret .= "</ul>\n";
170 return $ret;
171 }
172 }
173
174 $ancStack = array(); // the stack with ancestral elements
175
176 // Three global functions needed for parsing, sorry guys
177 function wgXMLstartElement($parser, $name, $attrs) {
178 global $ancStack;
179
180 $newElem = new element;
181 $newElem->name = $name;
182 $newElem->attrs = $attrs;
183
184 array_push($ancStack, $newElem);
185 }
186
187 function wgXMLendElement($parser, $name) {
188 global $ancStack, $rootElem;
189 // pop element off stack
190 $elem = array_pop ($ancStack);
191 if (count ($ancStack) == 0)
192 $rootElem = $elem;
193 else
194 // add it to its parent
195 array_push ($ancStack[count($ancStack)-1]->children, $elem);
196 }
197
198 function wgXMLcharacterData($parser, $data) {
199 global $ancStack;
200 $data = trim ($data); // Don't add blank lines, they're no use...
201 // add to parent if parent exists
202 if ( $ancStack && $data != "" ) {
203 array_push ($ancStack[count($ancStack)-1]->children, $data);
204 }
205 }
206
207
208 /**
209 * Here's the class that generates a nice tree
210 */
211 class xml2php {
212
213 function &scanFile( $filename ) {
214 global $ancStack, $rootElem;
215 $ancStack = array();
216
217 $xml_parser = xml_parser_create();
218 xml_set_element_handler ($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
219 xml_set_character_data_handler ($xml_parser, 'wgXMLcharacterData');
220 if (!($fp = fopen($filename, 'r'))) {
221 die('could not open XML input');
222 }
223 while ($data = fread($fp, 4096)) {
224 if (!xml_parse($xml_parser, $data, feof($fp))) {
225 die(sprintf("XML error: %s at line %d",
226 xml_error_string(xml_get_error_code($xml_parser)),
227 xml_get_current_line_number($xml_parser)));
228 }
229 }
230 xml_parser_free($xml_parser);
231
232 // return the remaining root element we copied in the beginning
233 return $rootElem;
234 }
235
236 function scanString ( $input ) {
237 global $ancStack, $rootElem;
238 $ancStack = array();
239
240 $xml_parser = xml_parser_create();
241 xml_set_element_handler ($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
242 xml_set_character_data_handler ($xml_parser, 'wgXMLcharacterData');
243
244 if (!xml_parse ($xml_parser, $input, true)) {
245 die (sprintf ("XML error: %s at line %d",
246 xml_error_string(xml_get_error_code($xml_parser)),
247 xml_get_current_line_number($xml_parser)));
248 }
249 xml_parser_free ($xml_parser);
250
251 // return the remaining root element we copied in the beginning
252 return $rootElem;
253 }
254
255 }
256
257 /* Example code:
258
259 $w = new xml2php;
260 $filename = 'sample.xml';
261 $result = $w->scanFile( $filename );
262 print $result->myPrint();
263 */
264
265 $dummytext = "<article><heading level='2'> R-type </heading><paragraph><link><linktarget>image:a.jpg</linktarget><linkoption>1</linkoption><linkoption>2</linkoption><linkoption>3</linkoption><linkoption>text</linkoption></link></paragraph><paragraph>The <link><linktarget>video game</linktarget><linkoption>computer game</linkoption></link> <bold>R-type</bold> is <extension name='nowiki'>cool &amp; stuff</extension> because:</paragraph><list type='bullet'><listitem>it's nice</listitem><listitem>it's fast</listitem><listitem>it has:<list type='bullet'><listitem>graphics</listitem><listitem>sound</listitem></list></listitem></list><table><tablerow><tablecell>Version 1 </tablecell><tablecell>not bad</tablecell></tablerow><tablerow><tablecell>Version 2 </tablecell><tablecell>much better </tablecell></tablerow></table><paragraph>This is a || token in the middle of text.</paragraph></article>" ;
266
267 class ParserXML EXTENDS Parser
268 {
269 /**#@+
270 * @access private
271 */
272 # Persistent:
273 var $mTagHooks;
274
275 # Cleared with clearState():
276 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
277 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
278
279 # Temporary:
280 var $mOptions, $mTitle, $mOutputType,
281 $mTemplates, // cache of already loaded templates, avoids
282 // multiple SQL queries for the same string
283 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
284 // in this path. Used for loop detection.
285
286 /**#@-*/
287
288 /**
289 * Constructor
290 *
291 * @access public
292 */
293 function ParserXML() {
294 $this->mTemplates = array();
295 $this->mTemplatePath = array();
296 $this->mTagHooks = array();
297 $this->clearState();
298 }
299
300 /**
301 * Clear Parser state
302 *
303 * @access private
304 */
305 function clearState() {
306 $this->mOutput = new ParserOutput;
307 $this->mAutonumber = 0;
308 $this->mLastSection = "";
309 $this->mDTopen = false;
310 $this->mVariables = false;
311 $this->mIncludeCount = array();
312 $this->mStripState = array();
313 $this->mArgStack = array();
314 $this->mInPre = false;
315 }
316
317 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
318 global $dummytext ;
319 $text = $dummytext ;
320
321 $w = new xml2php;
322 $result = $w->scanString( $text );
323 $text .= "<hr>" . $result->makeXHTML ( $this );
324 $text .= "<hr>" . $result->myPrint();
325
326 $this->mOutput->setText ( $text ) ;
327 return $this->mOutput;
328 }
329
330 }
331
332 ?>