3f46453d7b201908951fa8c043d290b5f4b161fc
[lhc/web/wiklou.git] / includes / libs / JavaScriptMinifier.php
1 <?php
2 /**
3 * JavaScript Minifier
4 *
5 * @file
6 * @author Paul Copperman <paul.copperman@gmail.com>
7 * @license Choose any of Apache, MIT, GPL, LGPL
8 */
9
10 /**
11 * This class is meant to safely minify javascript code, while leaving syntactically correct
12 * programs intact. Other libraries, such as JSMin require a certain coding style to work
13 * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather
14 * slow, because they construct a complete parse tree before outputting the code minified.
15 * So this class is meant to allow arbitrary (but syntactically correct) input, while being
16 * fast enough to be used for on-the-fly minifying.
17 */
18 class JavaScriptMinifier {
19
20 /* Parsing states.
21 * The state machine is only necessary to decide whether to parse a slash as division
22 * operator or as regexp literal.
23 * States are named after the next expected item. We only distinguish states when the
24 * distinction is relevant for our purpose.
25 */
26 const STATEMENT = 0;
27 const CONDITION = 1;
28 const PROPERTY_ASSIGNMENT = 2;
29 const EXPRESSION = 3;
30 const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion
31 const EXPRESSION_OP = 5;
32 const EXPRESSION_FUNC = 6;
33 const EXPRESSION_TERNARY = 7; // used to determine the role of a colon
34 const EXPRESSION_TERNARY_OP = 8;
35 const EXPRESSION_TERNARY_FUNC = 9;
36 const PAREN_EXPRESSION = 10; // expression which is not on the top level
37 const PAREN_EXPRESSION_OP = 11;
38 const PAREN_EXPRESSION_FUNC = 12;
39 const PROPERTY_EXPRESSION = 13; // expression which is within an object literal
40 const PROPERTY_EXPRESSION_OP = 14;
41 const PROPERTY_EXPRESSION_FUNC = 15;
42
43 /* Token types */
44 const TYPE_UN_OP = 1; // unary operators
45 const TYPE_INCR_OP = 2; // ++ and --
46 const TYPE_BIN_OP = 3; // binary operators
47 const TYPE_ADD_OP = 4; // + and - which can be either unary or binary ops
48 const TYPE_HOOK = 5; // ?
49 const TYPE_COLON = 6; // :
50 const TYPE_COMMA = 7; // ,
51 const TYPE_SEMICOLON = 8; // ;
52 const TYPE_BRACE_OPEN = 9; // {
53 const TYPE_BRACE_CLOSE = 10; // }
54 const TYPE_PAREN_OPEN = 11; // ( and [
55 const TYPE_PAREN_CLOSE = 12; // ) and ]
56 const TYPE_RETURN = 13; // keywords: break, continue, return, throw
57 const TYPE_IF = 14; // keywords: catch, for, with, switch, while, if
58 const TYPE_DO = 15; // keywords: case, var, finally, else, do, try
59 const TYPE_FUNC = 16; // keywords: function
60 const TYPE_LITERAL = 17; // all literals, identifiers and unrecognised tokens
61
62 // Sanity limit to avoid excessive memory usage
63 const STACK_LIMIT = 1000;
64
65 /**
66 * Returns minified JavaScript code.
67 *
68 * NOTE: $maxLineLength isn't a strict maximum. Longer lines will be produced when
69 * literals (e.g. quoted strings) longer than $maxLineLength are encountered
70 * or when required to guard against semicolon insertion.
71 *
72 * @param string $s JavaScript code to minify
73 * @param int $maxLineLength Maximum length of a single line, or -1 for no maximum.
74 * @return String Minified code
75 */
76 public static function minify( $s, $maxLineLength = 1000 ) {
77 // First we declare a few tables that contain our parsing rules
78
79 // $opChars : characters, which can be combined without whitespace in between them
80 $opChars = [
81 '!' => true,
82 '"' => true,
83 '%' => true,
84 '&' => true,
85 "'" => true,
86 '(' => true,
87 ')' => true,
88 '*' => true,
89 '+' => true,
90 ',' => true,
91 '-' => true,
92 '.' => true,
93 '/' => true,
94 ':' => true,
95 ';' => true,
96 '<' => true,
97 '=' => true,
98 '>' => true,
99 '?' => true,
100 '[' => true,
101 ']' => true,
102 '^' => true,
103 '{' => true,
104 '|' => true,
105 '}' => true,
106 '~' => true
107 ];
108
109 // $tokenTypes : maps keywords and operators to their corresponding token type
110 $tokenTypes = [
111 '!' => self::TYPE_UN_OP,
112 '~' => self::TYPE_UN_OP,
113 'delete' => self::TYPE_UN_OP,
114 'new' => self::TYPE_UN_OP,
115 'typeof' => self::TYPE_UN_OP,
116 'void' => self::TYPE_UN_OP,
117 '++' => self::TYPE_INCR_OP,
118 '--' => self::TYPE_INCR_OP,
119 '!=' => self::TYPE_BIN_OP,
120 '!==' => self::TYPE_BIN_OP,
121 '%' => self::TYPE_BIN_OP,
122 '%=' => self::TYPE_BIN_OP,
123 '&' => self::TYPE_BIN_OP,
124 '&&' => self::TYPE_BIN_OP,
125 '&=' => self::TYPE_BIN_OP,
126 '*' => self::TYPE_BIN_OP,
127 '*=' => self::TYPE_BIN_OP,
128 '+=' => self::TYPE_BIN_OP,
129 '-=' => self::TYPE_BIN_OP,
130 '.' => self::TYPE_BIN_OP,
131 '/' => self::TYPE_BIN_OP,
132 '/=' => self::TYPE_BIN_OP,
133 '<' => self::TYPE_BIN_OP,
134 '<<' => self::TYPE_BIN_OP,
135 '<<=' => self::TYPE_BIN_OP,
136 '<=' => self::TYPE_BIN_OP,
137 '=' => self::TYPE_BIN_OP,
138 '==' => self::TYPE_BIN_OP,
139 '===' => self::TYPE_BIN_OP,
140 '>' => self::TYPE_BIN_OP,
141 '>=' => self::TYPE_BIN_OP,
142 '>>' => self::TYPE_BIN_OP,
143 '>>=' => self::TYPE_BIN_OP,
144 '>>>' => self::TYPE_BIN_OP,
145 '>>>=' => self::TYPE_BIN_OP,
146 '^' => self::TYPE_BIN_OP,
147 '^=' => self::TYPE_BIN_OP,
148 '|' => self::TYPE_BIN_OP,
149 '|=' => self::TYPE_BIN_OP,
150 '||' => self::TYPE_BIN_OP,
151 'in' => self::TYPE_BIN_OP,
152 'instanceof' => self::TYPE_BIN_OP,
153 '+' => self::TYPE_ADD_OP,
154 '-' => self::TYPE_ADD_OP,
155 '?' => self::TYPE_HOOK,
156 ':' => self::TYPE_COLON,
157 ',' => self::TYPE_COMMA,
158 ';' => self::TYPE_SEMICOLON,
159 '{' => self::TYPE_BRACE_OPEN,
160 '}' => self::TYPE_BRACE_CLOSE,
161 '(' => self::TYPE_PAREN_OPEN,
162 '[' => self::TYPE_PAREN_OPEN,
163 ')' => self::TYPE_PAREN_CLOSE,
164 ']' => self::TYPE_PAREN_CLOSE,
165 'break' => self::TYPE_RETURN,
166 'continue' => self::TYPE_RETURN,
167 'return' => self::TYPE_RETURN,
168 'throw' => self::TYPE_RETURN,
169 'catch' => self::TYPE_IF,
170 'for' => self::TYPE_IF,
171 'if' => self::TYPE_IF,
172 'switch' => self::TYPE_IF,
173 'while' => self::TYPE_IF,
174 'with' => self::TYPE_IF,
175 'case' => self::TYPE_DO,
176 'do' => self::TYPE_DO,
177 'else' => self::TYPE_DO,
178 'finally' => self::TYPE_DO,
179 'try' => self::TYPE_DO,
180 'var' => self::TYPE_DO,
181 'function' => self::TYPE_FUNC
182 ];
183
184 // $goto : This is the main table for our state machine. For every state/token pair
185 // the following state is defined. When no rule exists for a given pair,
186 // the state is left unchanged.
187 $goto = [
188 self::STATEMENT => [
189 self::TYPE_UN_OP => self::EXPRESSION,
190 self::TYPE_INCR_OP => self::EXPRESSION,
191 self::TYPE_ADD_OP => self::EXPRESSION,
192 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
193 self::TYPE_RETURN => self::EXPRESSION_NO_NL,
194 self::TYPE_IF => self::CONDITION,
195 self::TYPE_FUNC => self::CONDITION,
196 self::TYPE_LITERAL => self::EXPRESSION_OP
197 ],
198 self::CONDITION => [
199 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
200 ],
201 self::PROPERTY_ASSIGNMENT => [
202 self::TYPE_COLON => self::PROPERTY_EXPRESSION,
203 self::TYPE_BRACE_OPEN => self::STATEMENT
204 ],
205 self::EXPRESSION => [
206 self::TYPE_SEMICOLON => self::STATEMENT,
207 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
208 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
209 self::TYPE_FUNC => self::EXPRESSION_FUNC,
210 self::TYPE_LITERAL => self::EXPRESSION_OP
211 ],
212 self::EXPRESSION_NO_NL => [
213 self::TYPE_SEMICOLON => self::STATEMENT,
214 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
215 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
216 self::TYPE_FUNC => self::EXPRESSION_FUNC,
217 self::TYPE_LITERAL => self::EXPRESSION_OP
218 ],
219 self::EXPRESSION_OP => [
220 self::TYPE_BIN_OP => self::EXPRESSION,
221 self::TYPE_ADD_OP => self::EXPRESSION,
222 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
223 self::TYPE_COLON => self::STATEMENT,
224 self::TYPE_COMMA => self::EXPRESSION,
225 self::TYPE_SEMICOLON => self::STATEMENT,
226 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
227 ],
228 self::EXPRESSION_FUNC => [
229 self::TYPE_BRACE_OPEN => self::STATEMENT
230 ],
231 self::EXPRESSION_TERNARY => [
232 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
233 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
234 self::TYPE_FUNC => self::EXPRESSION_TERNARY_FUNC,
235 self::TYPE_LITERAL => self::EXPRESSION_TERNARY_OP
236 ],
237 self::EXPRESSION_TERNARY_OP => [
238 self::TYPE_BIN_OP => self::EXPRESSION_TERNARY,
239 self::TYPE_ADD_OP => self::EXPRESSION_TERNARY,
240 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
241 self::TYPE_COMMA => self::EXPRESSION_TERNARY,
242 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
243 ],
244 self::EXPRESSION_TERNARY_FUNC => [
245 self::TYPE_BRACE_OPEN => self::STATEMENT
246 ],
247 self::PAREN_EXPRESSION => [
248 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
249 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
250 self::TYPE_FUNC => self::PAREN_EXPRESSION_FUNC,
251 self::TYPE_LITERAL => self::PAREN_EXPRESSION_OP
252 ],
253 self::PAREN_EXPRESSION_OP => [
254 self::TYPE_BIN_OP => self::PAREN_EXPRESSION,
255 self::TYPE_ADD_OP => self::PAREN_EXPRESSION,
256 self::TYPE_HOOK => self::PAREN_EXPRESSION,
257 self::TYPE_COLON => self::PAREN_EXPRESSION,
258 self::TYPE_COMMA => self::PAREN_EXPRESSION,
259 self::TYPE_SEMICOLON => self::PAREN_EXPRESSION,
260 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
261 ],
262 self::PAREN_EXPRESSION_FUNC => [
263 self::TYPE_BRACE_OPEN => self::STATEMENT
264 ],
265 self::PROPERTY_EXPRESSION => [
266 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
267 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
268 self::TYPE_FUNC => self::PROPERTY_EXPRESSION_FUNC,
269 self::TYPE_LITERAL => self::PROPERTY_EXPRESSION_OP
270 ],
271 self::PROPERTY_EXPRESSION_OP => [
272 self::TYPE_BIN_OP => self::PROPERTY_EXPRESSION,
273 self::TYPE_ADD_OP => self::PROPERTY_EXPRESSION,
274 self::TYPE_HOOK => self::PROPERTY_EXPRESSION,
275 self::TYPE_COMMA => self::PROPERTY_ASSIGNMENT,
276 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
277 ],
278 self::PROPERTY_EXPRESSION_FUNC => [
279 self::TYPE_BRACE_OPEN => self::STATEMENT
280 ]
281 ];
282
283 // $push : This table contains the rules for when to push a state onto the stack.
284 // The pushed state is the state to return to when the corresponding
285 // closing token is found
286 $push = [
287 self::STATEMENT => [
288 self::TYPE_BRACE_OPEN => self::STATEMENT,
289 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
290 ],
291 self::CONDITION => [
292 self::TYPE_PAREN_OPEN => self::STATEMENT
293 ],
294 self::PROPERTY_ASSIGNMENT => [
295 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT
296 ],
297 self::EXPRESSION => [
298 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
299 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
300 ],
301 self::EXPRESSION_NO_NL => [
302 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
303 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
304 ],
305 self::EXPRESSION_OP => [
306 self::TYPE_HOOK => self::EXPRESSION,
307 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
308 ],
309 self::EXPRESSION_FUNC => [
310 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP
311 ],
312 self::EXPRESSION_TERNARY => [
313 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP,
314 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
315 ],
316 self::EXPRESSION_TERNARY_OP => [
317 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
318 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
319 ],
320 self::EXPRESSION_TERNARY_FUNC => [
321 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP
322 ],
323 self::PAREN_EXPRESSION => [
324 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP,
325 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
326 ],
327 self::PAREN_EXPRESSION_OP => [
328 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
329 ],
330 self::PAREN_EXPRESSION_FUNC => [
331 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP
332 ],
333 self::PROPERTY_EXPRESSION => [
334 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
335 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
336 ],
337 self::PROPERTY_EXPRESSION_OP => [
338 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
339 ],
340 self::PROPERTY_EXPRESSION_FUNC => [
341 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP
342 ]
343 ];
344
345 // $pop : Rules for when to pop a state from the stack
346 $pop = [
347 self::STATEMENT => [ self::TYPE_BRACE_CLOSE => true ],
348 self::PROPERTY_ASSIGNMENT => [ self::TYPE_BRACE_CLOSE => true ],
349 self::EXPRESSION => [ self::TYPE_BRACE_CLOSE => true ],
350 self::EXPRESSION_NO_NL => [ self::TYPE_BRACE_CLOSE => true ],
351 self::EXPRESSION_OP => [ self::TYPE_BRACE_CLOSE => true ],
352 self::EXPRESSION_TERNARY_OP => [ self::TYPE_COLON => true ],
353 self::PAREN_EXPRESSION => [ self::TYPE_PAREN_CLOSE => true ],
354 self::PAREN_EXPRESSION_OP => [ self::TYPE_PAREN_CLOSE => true ],
355 self::PROPERTY_EXPRESSION => [ self::TYPE_BRACE_CLOSE => true ],
356 self::PROPERTY_EXPRESSION_OP => [ self::TYPE_BRACE_CLOSE => true ]
357 ];
358
359 // $semicolon : Rules for when a semicolon insertion is appropriate
360 $semicolon = [
361 self::EXPRESSION_NO_NL => [
362 self::TYPE_UN_OP => true,
363 self::TYPE_INCR_OP => true,
364 self::TYPE_ADD_OP => true,
365 self::TYPE_BRACE_OPEN => true,
366 self::TYPE_PAREN_OPEN => true,
367 self::TYPE_RETURN => true,
368 self::TYPE_IF => true,
369 self::TYPE_DO => true,
370 self::TYPE_FUNC => true,
371 self::TYPE_LITERAL => true
372 ],
373 self::EXPRESSION_OP => [
374 self::TYPE_UN_OP => true,
375 self::TYPE_INCR_OP => true,
376 self::TYPE_BRACE_OPEN => true,
377 self::TYPE_RETURN => true,
378 self::TYPE_IF => true,
379 self::TYPE_DO => true,
380 self::TYPE_FUNC => true,
381 self::TYPE_LITERAL => true
382 ]
383 ];
384
385 // $divStates : Contains all states that can be followed by a division operator
386 $divStates = [
387 self::EXPRESSION_OP => true,
388 self::EXPRESSION_TERNARY_OP => true,
389 self::PAREN_EXPRESSION_OP => true,
390 self::PROPERTY_EXPRESSION_OP => true
391 ];
392
393 // Here's where the minifying takes place: Loop through the input, looking for tokens
394 // and output them to $out, taking actions to the above defined rules when appropriate.
395 $out = '';
396 $pos = 0;
397 $length = strlen( $s );
398 $lineLength = 0;
399 $newlineFound = true;
400 $state = self::STATEMENT;
401 $stack = [];
402 $last = ';'; // Pretend that we have seen a semicolon yet
403 while ( $pos < $length ) {
404 // First, skip over any whitespace and multiline comments, recording whether we
405 // found any newline character
406 $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
407 if ( !$skip ) {
408 $ch = $s[$pos];
409 if ( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
410 // Multiline comment. Search for the end token or EOT.
411 $end = strpos( $s, '*/', $pos + 2 );
412 $skip = $end === false ? $length - $pos : $end - $pos + 2;
413 }
414 }
415 if ( $skip ) {
416 // The semicolon insertion mechanism needs to know whether there was a newline
417 // between two tokens, so record it now.
418 if ( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
419 $newlineFound = true;
420 }
421 $pos += $skip;
422 continue;
423 }
424 // Handle C++-style comments and html comments, which are treated as single line
425 // comments by the browser, regardless of whether the end tag is on the same line.
426 // Handle --> the same way, but only if it's at the beginning of the line
427 if ( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
428 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
429 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
430 ) {
431 $pos += strcspn( $s, "\r\n", $pos );
432 continue;
433 }
434
435 // Find out which kind of token we're handling.
436 // Note: $end must point past the end of the current token
437 // so that `substr($s, $pos, $end - $pos)` would be the entire token.
438 // In order words, $end will be the offset of the last relevant character
439 // in the stream + 1, or simply put: The offset of the first character
440 // of any next token in the stream.
441 $end = $pos + 1;
442 // Handle string literals
443 if ( $ch === "'" || $ch === '"' ) {
444 // Search to the end of the string literal, skipping over backslash escapes
445 $search = $ch . '\\';
446 do{
447 // Speculatively add 2 to the end so that if we see a backslash,
448 // the next iteration will start 2 characters further (one for the
449 // backslash, one for the escaped character).
450 // We'll correct this outside the loop.
451 $end += strcspn( $s, $search, $end ) + 2;
452 // If the last character in our search for a quote or a backlash
453 // matched a backslash and we haven't reached the end, keep searching..
454 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
455 // Correction (1): Undo speculative add, keep only one (end of string literal)
456 $end--;
457 if ( $end > $length ) {
458 // Correction (2): Loop wrongly assumed an end quote ended the search,
459 // but search ended because we've reached the end. Correct $end.
460 // TODO: This is invalid and should throw.
461 $end--;
462 }
463 // We have to distinguish between regexp literals and division operators
464 // A division operator is only possible in certain states
465 } elseif ( $ch === '/' && !isset( $divStates[$state] ) ) {
466 // Regexp literal
467 for ( ; ; ) {
468 // Search until we find "/" (end of regexp), "\" (backslash escapes),
469 // or "[" (start of character classes).
470 do{
471 // Speculatively add 2 to ensure next iteration skips
472 // over backslash and escaped character.
473 // We'll correct this outside the loop.
474 $end += strcspn( $s, '/[\\', $end ) + 2;
475 // If backslash escape, keep searching...
476 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
477 // Correction (1): Undo speculative add, keep only one (end of regexp)
478 $end--;
479 if ( $end > $length ) {
480 // Correction (2): Loop wrongly assumed end slash was seen
481 // String ended without end of regexp. Correct $end.
482 // TODO: This is invalid and should throw.
483 $end--;
484 break;
485 }
486 if ( $s[$end - 1] === '/' ) {
487 break;
488 }
489 // (Implicit else), we must've found the start of a char class,
490 // skip until we find "]" (end of char class), or "\" (backslash escape)
491 do{
492 // Speculatively add 2 for backslash escape.
493 // We'll substract one outside the loop.
494 $end += strcspn( $s, ']\\', $end ) + 2;
495 // If backslash escape, keep searching...
496 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
497 // Correction (1): Undo speculative add, keep only one (end of regexp)
498 $end--;
499 }
500 // Search past the regexp modifiers (gi)
501 while ( $end < $length && ctype_alpha( $s[$end] ) ) {
502 $end++;
503 }
504 } elseif (
505 $ch === '0'
506 && ( $pos + 1 < $length ) && ( $s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
507 ) {
508 // Hex numeric literal
509 $end++; // x or X
510 $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
511 if ( !$len ) {
512 return self::parseError(
513 $s,
514 $pos,
515 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...'
516 );
517 }
518 $end += $len;
519 } elseif (
520 ctype_digit( $ch )
521 || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
522 ) {
523 $end += strspn( $s, '0123456789', $end );
524 $decimal = strspn( $s, '.', $end );
525 if ( $decimal ) {
526 if ( $decimal > 2 ) {
527 return self::parseError( $s, $end, 'The number has too many decimal points' );
528 }
529 $end += strspn( $s, '0123456789', $end + 1 ) + $decimal;
530 }
531 $exponent = strspn( $s, 'eE', $end );
532 if ( $exponent ) {
533 if ( $exponent > 1 ) {
534 return self::parseError( $s, $end, 'Number with several E' );
535 }
536 $end++;
537
538 // + sign is optional; - sign is required.
539 $end += strspn( $s, '-+', $end );
540 $len = strspn( $s, '0123456789', $end );
541 if ( !$len ) {
542 return self::parseError(
543 $s,
544 $pos,
545 'No decimal digits after e, how many zeroes should be added?'
546 );
547 }
548 $end += $len;
549 }
550 } elseif ( isset( $opChars[$ch] ) ) {
551 // Punctuation character. Search for the longest matching operator.
552 while (
553 $end < $length
554 && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
555 ) {
556 $end++;
557 }
558 } else {
559 // Identifier or reserved word. Search for the end by excluding whitespace and
560 // punctuation.
561 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
562 }
563
564 // Now get the token type from our type array
565 $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
566 $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL;
567
568 if ( $newlineFound && isset( $semicolon[$state][$type] ) ) {
569 // This token triggers the semicolon insertion mechanism of javascript. While we
570 // could add the ; token here ourselves, keeping the newline has a few advantages.
571 $out .= "\n";
572 $state = self::STATEMENT;
573 $lineLength = 0;
574 } elseif ( $maxLineLength > 0 && $lineLength + $end - $pos > $maxLineLength &&
575 !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP ) {
576 // This line would get too long if we added $token, so add a newline first.
577 // Only do this if it won't trigger semicolon insertion and if it won't
578 // put a postfix increment operator on its own line, which is illegal in js.
579 $out .= "\n";
580 $lineLength = 0;
581 // Check, whether we have to separate the token from the last one with whitespace
582 } elseif ( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
583 $out .= ' ';
584 $lineLength++;
585 // Don't accidentally create ++, -- or // tokens
586 } elseif ( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
587 $out .= ' ';
588 $lineLength++;
589 }
590 if (
591 $type === self::TYPE_LITERAL
592 && ( $token === 'true' || $token === 'false' )
593 && ( $state === self::EXPRESSION || $state === self::PROPERTY_EXPRESSION )
594 && $last !== '.'
595 ) {
596 $token = ( $token === 'true' ) ? '!0' : '!1';
597 }
598
599 $out .= $token;
600 $lineLength += $end - $pos; // += strlen( $token )
601 $last = $s[$end - 1];
602 $pos = $end;
603 $newlineFound = false;
604
605 // Now that we have output our token, transition into the new state.
606 if ( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) {
607 $stack[] = $push[$state][$type];
608 }
609 if ( $stack && isset( $pop[$state][$type] ) ) {
610 $state = array_pop( $stack );
611 } elseif ( isset( $goto[$state][$type] ) ) {
612 $state = $goto[$state][$type];
613 }
614 }
615 return $out;
616 }
617
618 static function parseError( $fullJavascript, $position, $errorMsg ) {
619 // TODO: Handle the error: trigger_error, throw exception, return false...
620 return false;
621 }
622 }