2 // +----------------------------------------------------------------------+
3 // | Copyright (c) 1997-2002 The PHP Group |
4 // +----------------------------------------------------------------------+
5 // | This source file is subject to version 2.02 of the PHP license, |
6 // | that is bundled with this package in the file LICENSE, and is |
7 // | available at through the world-wide-web at |
8 // | http://www.php.net/license/3_0.txt. |
9 // | If you did not receive a copy of the PHP license and are unable to |
10 // | obtain it through the world-wide-web, please send a note to |
11 // | license@php.net so we can mail you a copy immediately. |
12 // +----------------------------------------------------------------------+
13 // | Authors: Alexander Zhukov <alex@veresk.ru> Original port from Python |
14 // | Authors: Harry Fuecks <hfuecks@phppatterns.com> Port to PEAR + more |
15 // | Authors: Many @ Sitepointforums Advanced PHP Forums |
16 // +----------------------------------------------------------------------+
19 if (!defined('_ECRIRE_INC_VERSION')) return;
21 if (!defined('XML_HTMLSAX3')) {
22 define('XML_HTMLSAX3', 'XML/');
24 require_once(XML_HTMLSAX3
. 'HTMLSax3/States.php');
25 require_once(XML_HTMLSAX3
. 'HTMLSax3/Decorators.php');
27 class XML_HTMLSax3_StateParser
{
29 var $handler_object_element;
30 var $handler_method_opening;
31 var $handler_method_closing;
32 var $handler_object_data;
33 var $handler_method_data;
34 var $handler_object_pi;
35 var $handler_method_pi;
36 var $handler_object_jasp;
37 var $handler_method_jasp;
38 var $handler_object_escape;
39 var $handler_method_escape;
41 var $parser_options = array();
47 function XML_HTMLSax3_StateParser (& $htmlsax) {
48 $this->htmlsax
= & $htmlsax;
50 $this->State
[XML_HTMLSAX3_STATE_START
] = new XML_HTMLSax3_StartingState();
52 $this->State
[XML_HTMLSAX3_STATE_CLOSING_TAG
] = new XML_HTMLSax3_ClosingTagState();
53 $this->State
[XML_HTMLSAX3_STATE_TAG
] = new XML_HTMLSax3_TagState();
54 $this->State
[XML_HTMLSAX3_STATE_OPENING_TAG
] = new XML_HTMLSax3_OpeningTagState();
56 $this->State
[XML_HTMLSAX3_STATE_PI
] = new XML_HTMLSax3_PiState();
57 $this->State
[XML_HTMLSAX3_STATE_JASP
] = new XML_HTMLSax3_JaspState();
58 $this->State
[XML_HTMLSAX3_STATE_ESCAPE
] = new XML_HTMLSax3_EscapeState();
61 function unscanCharacter() {
65 function ignoreCharacter() {
69 function scanCharacter() {
70 if ($this->position
< $this->length
) {
71 return $this->rawtext
{$this->position++
};
75 function scanUntilString($string) {
76 $start = $this->position
;
77 $this->position
= strpos($this->rawtext
, $string, $start);
78 if ($this->position
=== FALSE) {
79 $this->position
= $this->length
;
81 return substr($this->rawtext
, $start, $this->position
- $start);
84 function scanUntilCharacters($string) {}
86 function ignoreWhitespace() {}
88 function parse($data) {
89 if ($this->parser_options
['XML_OPTION_TRIM_DATA_NODES']==1) {
90 $decorator = new XML_HTMLSax3_Trim(
91 $this->handler_object_data
,
92 $this->handler_method_data
);
93 $this->handler_object_data
=& $decorator;
94 $this->handler_method_data
= 'trimData';
96 if ($this->parser_options
['XML_OPTION_CASE_FOLDING']==1) {
97 $open_decor = new XML_HTMLSax3_CaseFolding(
98 $this->handler_object_element
,
99 $this->handler_method_opening
,
100 $this->handler_method_closing
);
101 $this->handler_object_element
=& $open_decor;
102 $this->handler_method_opening
='foldOpen';
103 $this->handler_method_closing
='foldClose';
105 if ($this->parser_options
['XML_OPTION_LINEFEED_BREAK']==1) {
106 $decorator = new XML_HTMLSax3_Linefeed(
107 $this->handler_object_data
,
108 $this->handler_method_data
);
109 $this->handler_object_data
=& $decorator;
110 $this->handler_method_data
= 'breakData';
112 if ($this->parser_options
['XML_OPTION_TAB_BREAK']==1) {
113 $decorator = new XML_HTMLSax3_Tab(
114 $this->handler_object_data
,
115 $this->handler_method_data
);
116 $this->handler_object_data
=& $decorator;
117 $this->handler_method_data
= 'breakData';
119 if ($this->parser_options
['XML_OPTION_ENTITIES_UNPARSED']==1) {
120 $decorator = new XML_HTMLSax3_Entities_Unparsed(
121 $this->handler_object_data
,
122 $this->handler_method_data
);
123 $this->handler_object_data
=& $decorator;
124 $this->handler_method_data
= 'breakData';
126 if ($this->parser_options
['XML_OPTION_ENTITIES_PARSED']==1) {
127 $decorator = new XML_HTMLSax3_Entities_Parsed(
128 $this->handler_object_data
,
129 $this->handler_method_data
);
130 $this->handler_object_data
=& $decorator;
131 $this->handler_method_data
= 'breakData';
133 // Note switched on by default
134 if ($this->parser_options
['XML_OPTION_STRIP_ESCAPES']==1) {
135 $decorator = new XML_HTMLSax3_Escape_Stripper(
136 $this->handler_object_escape
,
137 $this->handler_method_escape
);
138 $this->handler_object_escape
=& $decorator;
139 $this->handler_method_escape
= 'strip';
141 $this->rawtext
= $data;
142 $this->length
= strlen($data);
147 function _parse($state = XML_HTMLSAX3_STATE_START
) {
149 $state = $this->State
[$state]->parse($this);
150 } while ($state != XML_HTMLSAX3_STATE_STOP
&&
151 $this->position
< $this->length
);
155 class XML_HTMLSax3_StateParser_Lt430
extends XML_HTMLSax3_StateParser
{
156 function XML_HTMLSax3_StateParser_Lt430(& $htmlsax) {
157 parent
::XML_HTMLSax3_StateParser($htmlsax);
158 $this->parser_options
['XML_OPTION_TRIM_DATA_NODES'] = 0;
159 $this->parser_options
['XML_OPTION_CASE_FOLDING'] = 0;
160 $this->parser_options
['XML_OPTION_LINEFEED_BREAK'] = 0;
161 $this->parser_options
['XML_OPTION_TAB_BREAK'] = 0;
162 $this->parser_options
['XML_OPTION_ENTITIES_PARSED'] = 0;
163 $this->parser_options
['XML_OPTION_ENTITIES_UNPARSED'] = 0;
164 $this->parser_options
['XML_OPTION_STRIP_ESCAPES'] = 0;
167 function scanUntilCharacters($string) {
168 $startpos = $this->position
;
169 while ($this->position
< $this->length
&& strpos($string, $this->rawtext
{$this->position
}) === FALSE) {
172 return substr($this->rawtext
, $startpos, $this->position
- $startpos);
175 function ignoreWhitespace() {
176 while ($this->position
< $this->length
&&
177 strpos(" \n\r\t", $this->rawtext
{$this->position
}) !== FALSE) {
182 function parse($data) {
183 parent
::parse($data);
187 class XML_HTMLSax3_StateParser_Gtet430
extends XML_HTMLSax3_StateParser
{
188 function XML_HTMLSax3_StateParser_Gtet430(& $htmlsax) {
189 parent
::XML_HTMLSax3_StateParser($htmlsax);
190 $this->parser_options
['XML_OPTION_TRIM_DATA_NODES'] = 0;
191 $this->parser_options
['XML_OPTION_CASE_FOLDING'] = 0;
192 $this->parser_options
['XML_OPTION_LINEFEED_BREAK'] = 0;
193 $this->parser_options
['XML_OPTION_TAB_BREAK'] = 0;
194 $this->parser_options
['XML_OPTION_ENTITIES_PARSED'] = 0;
195 $this->parser_options
['XML_OPTION_ENTITIES_UNPARSED'] = 0;
196 $this->parser_options
['XML_OPTION_STRIP_ESCAPES'] = 0;
198 function scanUntilCharacters($string) {
199 $startpos = $this->position
;
200 $length = strcspn($this->rawtext
, $string, $startpos);
201 $this->position +
= $length;
202 return substr($this->rawtext
, $startpos, $length);
205 function ignoreWhitespace() {
206 $this->position +
= strspn($this->rawtext
, " \n\r\t", $this->position
);
209 function parse($data) {
210 parent
::parse($data);
214 class XML_HTMLSax3_NullHandler
{
215 function DoNothing() {
222 function XML_HTMLSax3() {
223 if (version_compare(phpversion(), '4.3', 'ge')) {
224 $this->state_parser
= new XML_HTMLSax3_StateParser_Gtet430($this);
226 $this->state_parser
= new XML_HTMLSax3_StateParser_Lt430($this);
228 $nullhandler = new XML_HTMLSax3_NullHandler();
229 $this->set_object($nullhandler);
230 $this->set_element_handler('DoNothing', 'DoNothing');
231 $this->set_data_handler('DoNothing');
232 $this->set_pi_handler('DoNothing');
233 $this->set_jasp_handler('DoNothing');
234 $this->set_escape_handler('DoNothing');
237 function set_object(&$object) {
238 if ( is_object($object) ) {
239 $this->state_parser
->handler_default
=& $object;
242 require_once('PEAR.php');
243 PEAR
::raiseError('XML_HTMLSax3::set_object requires '.
244 'an object instance');
248 function set_option($name, $value=1) {
249 if ( array_key_exists($name,$this->state_parser
->parser_options
) ) {
250 $this->state_parser
->parser_options
[$name] = $value;
253 require_once('PEAR.php');
254 PEAR
::raiseError('XML_HTMLSax3::set_option('.$name.') illegal');
258 function set_data_handler($data_method) {
259 $this->state_parser
->handler_object_data
=& $this->state_parser
->handler_default
;
260 $this->state_parser
->handler_method_data
= $data_method;
263 function set_element_handler($opening_method, $closing_method) {
264 $this->state_parser
->handler_object_element
=& $this->state_parser
->handler_default
;
265 $this->state_parser
->handler_method_opening
= $opening_method;
266 $this->state_parser
->handler_method_closing
= $closing_method;
269 function set_pi_handler($pi_method) {
270 $this->state_parser
->handler_object_pi
=& $this->state_parser
->handler_default
;
271 $this->state_parser
->handler_method_pi
= $pi_method;
274 function set_escape_handler($escape_method) {
275 $this->state_parser
->handler_object_escape
=& $this->state_parser
->handler_default
;
276 $this->state_parser
->handler_method_escape
= $escape_method;
279 function set_jasp_handler ($jasp_method) {
280 $this->state_parser
->handler_object_jasp
=& $this->state_parser
->handler_default
;
281 $this->state_parser
->handler_method_jasp
= $jasp_method;
284 function get_current_position() {
285 return $this->state_parser
->position
;
288 function get_length() {
289 return $this->state_parser
->length
;
292 function parse($data) {
293 $this->state_parser
->parse($data);