6 * @author Paul Copperman <paul.copperman@gmail.com>
9 * @license GPL-2.0-or-later
10 * @license LGPL-2.1-or-later
14 * This class is meant to safely minify javascript code, while leaving syntactically correct
15 * programs intact. Other libraries, such as JSMin require a certain coding style to work
16 * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather
17 * slow, because they construct a complete parse tree before outputting the code minified.
18 * So this class is meant to allow arbitrary (but syntactically correct) input, while being
19 * fast enough to be used for on-the-fly minifying.
21 * This class was written with ECMA-262 Edition 3 in mind ("ECMAScript 3"). Parsing features
22 * new to ECMAScript 5 or later might not be supported. However, Edition 5.1 better reflects
23 * how actual JS engines worked and work and is simpler and more readable prose. As such,
24 * the below code will refer to sections of the 5.1 specification.
26 * See <https://www.ecma-international.org/ecma-262/5.1/>.
28 class JavaScriptMinifier
{
31 * The state machine is only necessary to decide whether to parse a slash as division
32 * operator or as regexp literal.
33 * States are named after the next expected item. We only distinguish states when the
34 * distinction is relevant for our purpose.
38 const PROPERTY_ASSIGNMENT
= 2;
40 const EXPRESSION_NO_NL
= 4; // only relevant for semicolon insertion
41 const EXPRESSION_OP
= 5;
42 const EXPRESSION_FUNC
= 6;
43 const EXPRESSION_TERNARY
= 7; // used to determine the role of a colon
44 const EXPRESSION_TERNARY_OP
= 8;
45 const EXPRESSION_TERNARY_FUNC
= 9;
46 const PAREN_EXPRESSION
= 10; // expression which is not on the top level
47 const PAREN_EXPRESSION_OP
= 11;
48 const PAREN_EXPRESSION_FUNC
= 12;
49 const PROPERTY_EXPRESSION
= 13; // expression which is within an object literal
50 const PROPERTY_EXPRESSION_OP
= 14;
51 const PROPERTY_EXPRESSION_FUNC
= 15;
54 const TYPE_UN_OP
= 101; // unary operators
55 const TYPE_INCR_OP
= 102; // ++ and --
56 const TYPE_BIN_OP
= 103; // binary operators
57 const TYPE_ADD_OP
= 104; // + and - which can be either unary or binary ops
58 const TYPE_HOOK
= 105; // ?
59 const TYPE_COLON
= 106; // :
60 const TYPE_COMMA
= 107; // ,
61 const TYPE_SEMICOLON
= 108; // ;
62 const TYPE_BRACE_OPEN
= 109; // {
63 const TYPE_BRACE_CLOSE
= 110; // }
64 const TYPE_PAREN_OPEN
= 111; // ( and [
65 const TYPE_PAREN_CLOSE
= 112; // ) and ]
66 const TYPE_RETURN
= 113; // keywords: break, continue, return, throw
67 const TYPE_IF
= 114; // keywords: catch, for, with, switch, while, if
68 const TYPE_DO
= 115; // keywords: case, var, finally, else, do, try
69 const TYPE_FUNC
= 116; // keywords: function
70 const TYPE_LITERAL
= 117; // all literals, identifiers and unrecognised tokens
72 const ACTION_GOTO
= 201;
74 // Sanity limit to avoid excessive memory usage
75 const STACK_LIMIT
= 1000;
80 * This is not a strict maximum, but a guideline. Longer lines will be
81 * produced when literals (e.g. quoted strings) longer than this are
82 * encountered, or when required to guard against semicolon insertion.
84 * This is a private member (instead of constant) to allow tests to
85 * set it to 1, to verify ASI and line-breaking behaviour.
87 private static $maxLineLength = 1000;
90 * Returns minified JavaScript code.
92 * @param string $s JavaScript code to minify
93 * @return String Minified code
95 public static function minify( $s ) {
96 // First we declare a few tables that contain our parsing rules
98 // $opChars : Characters which can be combined without whitespace between them.
100 // ECMAScript 5.1 § 7.7 Punctuators
101 // Unlike the spec, these are individual symbols, not sequences.
126 // ECMAScript 5.1 § 7.8.4 String Literals
131 // $tokenTypes : Map keywords and operators to their corresponding token type
133 // ECMAScript 5.1 § 11.4 Unary Operators
134 // ECMAScript 5.1 § 11.6 Additive Operators
135 // UnaryExpression includes PostfixExpression, which includes 'new'.
136 'new' => self
::TYPE_UN_OP
,
137 'delete' => self
::TYPE_UN_OP
,
138 'void' => self
::TYPE_UN_OP
,
139 'typeof' => self
::TYPE_UN_OP
,
140 '++' => self
::TYPE_INCR_OP
,
141 '--' => self
::TYPE_INCR_OP
,
142 '+' => self
::TYPE_ADD_OP
,
143 '-' => self
::TYPE_ADD_OP
,
144 '~' => self
::TYPE_UN_OP
,
145 '!' => self
::TYPE_UN_OP
,
146 // ECMAScript 5.1 § 11.5 Multiplicative Operators
147 '*' => self
::TYPE_BIN_OP
,
148 '/' => self
::TYPE_BIN_OP
,
149 '%' => self
::TYPE_BIN_OP
,
150 // ECMAScript 5.1 § 11.7 Bitwise Shift Operators
151 '<<' => self
::TYPE_BIN_OP
,
152 '>>' => self
::TYPE_BIN_OP
,
153 '>>>' => self
::TYPE_BIN_OP
,
154 // ECMAScript 5.1 § 11.8 Relational Operators
155 '<' => self
::TYPE_BIN_OP
,
156 '>' => self
::TYPE_BIN_OP
,
157 '<=' => self
::TYPE_BIN_OP
,
158 '>=' => self
::TYPE_BIN_OP
,
159 // ECMAScript 5.1 § 11.9 Equality Operators
160 '==' => self
::TYPE_BIN_OP
,
161 '!=' => self
::TYPE_BIN_OP
,
162 '===' => self
::TYPE_BIN_OP
,
163 '!==' => self
::TYPE_BIN_OP
,
164 'instanceof' => self
::TYPE_BIN_OP
,
165 'in' => self
::TYPE_BIN_OP
,
166 // ECMAScript 5.1 § 11.10 Binary Bitwise Operators
167 '&' => self
::TYPE_BIN_OP
,
168 '^' => self
::TYPE_BIN_OP
,
169 '|' => self
::TYPE_BIN_OP
,
170 // ECMAScript 5.1 § 11.11 Binary Logical Operators
171 '&&' => self
::TYPE_BIN_OP
,
172 '||' => self
::TYPE_BIN_OP
,
173 // ECMAScript 5.1 § 11.12 Conditional Operator
174 // Also known as ternary.
175 '?' => self
::TYPE_HOOK
,
176 ':' => self
::TYPE_COLON
,
177 // ECMAScript 5.1 § 11.13 Assignment Operators
178 '=' => self
::TYPE_BIN_OP
,
179 '*=' => self
::TYPE_BIN_OP
,
180 '/=' => self
::TYPE_BIN_OP
,
181 '%=' => self
::TYPE_BIN_OP
,
182 '+=' => self
::TYPE_BIN_OP
,
183 '-=' => self
::TYPE_BIN_OP
,
184 '<<=' => self
::TYPE_BIN_OP
,
185 '>>=' => self
::TYPE_BIN_OP
,
186 '>>>=' => self
::TYPE_BIN_OP
,
187 '&=' => self
::TYPE_BIN_OP
,
188 '^=' => self
::TYPE_BIN_OP
,
189 '|=' => self
::TYPE_BIN_OP
,
190 // ECMAScript 5.1 § 11.14 Comma Operator
191 ',' => self
::TYPE_COMMA
,
193 // The keywords that disallow LineTerminator before their
194 // (sometimes optional) Expression or Identifier.
197 // keyword [no LineTerminator here] Identifier ;
198 // keyword [no LineTerminator here] Expression ;
200 // See also ECMAScript 5.1:
201 // - § 12.7 The continue Statement
202 // - $ 12.8 The break Statement
203 // - § 12.9 The return Statement
204 // - § 12.13 The throw Statement
205 'continue' => self
::TYPE_RETURN
,
206 'break' => self
::TYPE_RETURN
,
207 'return' => self
::TYPE_RETURN
,
208 'throw' => self
::TYPE_RETURN
,
210 // The keywords require a parenthesised Expression or Identifier
211 // before the next Statement.
213 // keyword ( Expression ) Statement
214 // keyword ( Identifier ) Statement
216 // See also ECMAScript 5.1:
217 // - § 12.5 The if Statement
218 // - § 12.6 Iteration Statements (do, while, for)
219 // - § 12.10 The with Statement
220 // - § 12.11 The switch Statement
221 // - § 12.13 The throw Statement
222 'if' => self
::TYPE_IF
,
223 'catch' => self
::TYPE_IF
,
224 'while' => self
::TYPE_IF
,
225 'for' => self
::TYPE_IF
,
226 'switch' => self
::TYPE_IF
,
227 'with' => self
::TYPE_IF
,
229 // The keywords followed by an Identifier, Statement,
230 // Expression, or Block.
239 // See also ECMAScript 5.1:
240 // - § 12.2 Variable Statement
241 // - § 12.5 The if Statement (else)
242 // - § 12.6 Iteration Statements (do, while, for)
243 // - § 12.11 The switch Statement (case)
244 // - § 12.14 The try Statement
245 'var' => self
::TYPE_DO
,
246 'else' => self
::TYPE_DO
,
247 'do' => self
::TYPE_DO
,
248 'case' => self
::TYPE_DO
,
249 'try' => self
::TYPE_DO
,
250 'finally' => self
::TYPE_DO
,
252 // ECMAScript 5.1 § 13 Function Definition
253 'function' => self
::TYPE_FUNC
,
256 // - DecimalLiteral (ECMAScript 5.1 § 7.8.3 Numeric Literals)
257 // - MemberExpression (ECMAScript 5.1 § 11.2 Left-Hand-Side Expressions)
258 '.' => self
::TYPE_BIN_OP
,
261 // - Block (ECMAScript 5.1 § 12.1 Block)
262 // - ObjectLiteral (ECMAScript 5.1 § 11.1 Primary Expressions)
263 '{' => self
::TYPE_BRACE_OPEN
,
264 '}' => self
::TYPE_BRACE_CLOSE
,
267 // - Parenthesised Identifier or Expression after a
268 // TYPE_IF or TYPE_FUNC keyword.
269 // - PrimaryExpression (ECMAScript 5.1 § 11.1 Primary Expressions)
270 // - CallExpression (ECMAScript 5.1 § 11.2 Left-Hand-Side Expressions)
271 '(' => self
::TYPE_PAREN_OPEN
,
272 ')' => self
::TYPE_PAREN_CLOSE
,
275 // - ArrayLiteral (ECMAScript 5.1 § 11.1 Primary Expressions)
276 '[' => self
::TYPE_PAREN_OPEN
,
277 ']' => self
::TYPE_PAREN_CLOSE
,
280 // - End of any statement
281 // - EmptyStatement (ECMAScript 5.1 § 12.3 Empty Statement)
282 ';' => self
::TYPE_SEMICOLON
,
285 // $model : This is the main table for our state machine. For every state/token pair
286 // the desired action is defined.
288 // Statement - This is the initial state.
290 self
::TYPE_UN_OP
=> [
291 self
::ACTION_GOTO
=> self
::EXPRESSION
,
293 self
::TYPE_INCR_OP
=> [
294 self
::ACTION_GOTO
=> self
::EXPRESSION
,
296 self
::TYPE_ADD_OP
=> [
297 self
::ACTION_GOTO
=> self
::EXPRESSION
,
299 self
::TYPE_PAREN_OPEN
=> [
300 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
302 self
::TYPE_RETURN
=> [
303 self
::ACTION_GOTO
=> self
::EXPRESSION_NO_NL
,
306 self
::ACTION_GOTO
=> self
::CONDITION
,
309 self
::ACTION_GOTO
=> self
::CONDITION
,
311 self
::TYPE_LITERAL
=> [
312 self
::ACTION_GOTO
=> self
::EXPRESSION_OP
,
316 self
::TYPE_PAREN_OPEN
=> [
317 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
320 self
::PROPERTY_ASSIGNMENT
=> [
321 self
::TYPE_COLON
=> [
322 self
::ACTION_GOTO
=> self
::PROPERTY_EXPRESSION
,
324 self
::TYPE_BRACE_OPEN
=> [
325 self
::ACTION_GOTO
=> self
::STATEMENT
,
328 self
::EXPRESSION
=> [
329 self
::TYPE_SEMICOLON
=> [
330 self
::ACTION_GOTO
=> self
::STATEMENT
,
332 self
::TYPE_BRACE_OPEN
=> [
333 self
::ACTION_GOTO
=> self
::PROPERTY_ASSIGNMENT
,
335 self
::TYPE_PAREN_OPEN
=> [
336 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
339 self
::ACTION_GOTO
=> self
::EXPRESSION_FUNC
,
341 self
::TYPE_LITERAL
=> [
342 self
::ACTION_GOTO
=> self
::EXPRESSION_OP
,
345 self
::EXPRESSION_NO_NL
=> [
346 self
::TYPE_SEMICOLON
=> [
347 self
::ACTION_GOTO
=> self
::STATEMENT
,
349 self
::TYPE_BRACE_OPEN
=> [
350 self
::ACTION_GOTO
=> self
::PROPERTY_ASSIGNMENT
,
352 self
::TYPE_PAREN_OPEN
=> [
353 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
356 self
::ACTION_GOTO
=> self
::EXPRESSION_FUNC
,
358 self
::TYPE_LITERAL
=> [
359 self
::ACTION_GOTO
=> self
::EXPRESSION_OP
,
362 self
::EXPRESSION_OP
=> [
363 self
::TYPE_BIN_OP
=> [
364 self
::ACTION_GOTO
=> self
::EXPRESSION
,
366 self
::TYPE_ADD_OP
=> [
367 self
::ACTION_GOTO
=> self
::EXPRESSION
,
370 self
::ACTION_GOTO
=> self
::EXPRESSION_TERNARY
,
372 self
::TYPE_COLON
=> [
373 self
::ACTION_GOTO
=> self
::STATEMENT
,
375 self
::TYPE_COMMA
=> [
376 self
::ACTION_GOTO
=> self
::EXPRESSION
,
378 self
::TYPE_SEMICOLON
=> [
379 self
::ACTION_GOTO
=> self
::STATEMENT
,
381 self
::TYPE_PAREN_OPEN
=> [
382 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
385 self
::EXPRESSION_FUNC
=> [
386 self
::TYPE_BRACE_OPEN
=> [
387 self
::ACTION_GOTO
=> self
::STATEMENT
,
390 self
::EXPRESSION_TERNARY
=> [
391 self
::TYPE_BRACE_OPEN
=> [
392 self
::ACTION_GOTO
=> self
::PROPERTY_ASSIGNMENT
,
394 self
::TYPE_PAREN_OPEN
=> [
395 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
398 self
::ACTION_GOTO
=> self
::EXPRESSION_TERNARY_FUNC
,
400 self
::TYPE_LITERAL
=> [
401 self
::ACTION_GOTO
=> self
::EXPRESSION_TERNARY_OP
,
404 self
::EXPRESSION_TERNARY_OP
=> [
405 self
::TYPE_BIN_OP
=> [
406 self
::ACTION_GOTO
=> self
::EXPRESSION_TERNARY
,
408 self
::TYPE_ADD_OP
=> [
409 self
::ACTION_GOTO
=> self
::EXPRESSION_TERNARY
,
412 self
::ACTION_GOTO
=> self
::EXPRESSION_TERNARY
,
414 self
::TYPE_COMMA
=> [
415 self
::ACTION_GOTO
=> self
::EXPRESSION_TERNARY
,
417 self
::TYPE_PAREN_OPEN
=> [
418 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
421 self
::EXPRESSION_TERNARY_FUNC
=> [
422 self
::TYPE_BRACE_OPEN
=> [
423 self
::ACTION_GOTO
=> self
::STATEMENT
,
426 self
::PAREN_EXPRESSION
=> [
427 self
::TYPE_BRACE_OPEN
=> [
428 self
::ACTION_GOTO
=> self
::PROPERTY_ASSIGNMENT
,
430 self
::TYPE_PAREN_OPEN
=> [
431 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
434 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION_FUNC
,
436 self
::TYPE_LITERAL
=> [
437 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION_OP
,
440 self
::PAREN_EXPRESSION_OP
=> [
441 self
::TYPE_BIN_OP
=> [
442 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
444 self
::TYPE_ADD_OP
=> [
445 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
448 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
450 self
::TYPE_COLON
=> [
451 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
453 self
::TYPE_COMMA
=> [
454 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
456 self
::TYPE_SEMICOLON
=> [
457 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
459 self
::TYPE_PAREN_OPEN
=> [
460 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
463 self
::PAREN_EXPRESSION_FUNC
=> [
464 self
::TYPE_BRACE_OPEN
=> [
465 self
::ACTION_GOTO
=> self
::STATEMENT
,
468 self
::PROPERTY_EXPRESSION
=> [
469 self
::TYPE_BRACE_OPEN
=> [
470 self
::ACTION_GOTO
=> self
::PROPERTY_ASSIGNMENT
,
472 self
::TYPE_PAREN_OPEN
=> [
473 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
476 self
::ACTION_GOTO
=> self
::PROPERTY_EXPRESSION_FUNC
,
478 self
::TYPE_LITERAL
=> [
479 self
::ACTION_GOTO
=> self
::PROPERTY_EXPRESSION_OP
,
482 self
::PROPERTY_EXPRESSION_OP
=> [
483 self
::TYPE_BIN_OP
=> [
484 self
::ACTION_GOTO
=> self
::PROPERTY_EXPRESSION
,
486 self
::TYPE_ADD_OP
=> [
487 self
::ACTION_GOTO
=> self
::PROPERTY_EXPRESSION
,
490 self
::ACTION_GOTO
=> self
::PROPERTY_EXPRESSION
,
492 self
::TYPE_COMMA
=> [
493 self
::ACTION_GOTO
=> self
::PROPERTY_ASSIGNMENT
,
495 self
::TYPE_PAREN_OPEN
=> [
496 self
::ACTION_GOTO
=> self
::PAREN_EXPRESSION
,
499 self
::PROPERTY_EXPRESSION_FUNC
=> [
500 self
::TYPE_BRACE_OPEN
=> [
501 self
::ACTION_GOTO
=> self
::STATEMENT
,
506 // $push : This table contains the rules for when to push a state onto the stack.
507 // The pushed state is the state to return to when the corresponding
508 // closing token is found
511 self
::TYPE_BRACE_OPEN
=> self
::STATEMENT
,
512 self
::TYPE_PAREN_OPEN
=> self
::EXPRESSION_OP
515 self
::TYPE_PAREN_OPEN
=> self
::STATEMENT
517 self
::PROPERTY_ASSIGNMENT
=> [
518 self
::TYPE_BRACE_OPEN
=> self
::PROPERTY_ASSIGNMENT
520 self
::EXPRESSION
=> [
521 self
::TYPE_BRACE_OPEN
=> self
::EXPRESSION_OP
,
522 self
::TYPE_PAREN_OPEN
=> self
::EXPRESSION_OP
524 self
::EXPRESSION_NO_NL
=> [
525 self
::TYPE_BRACE_OPEN
=> self
::EXPRESSION_OP
,
526 self
::TYPE_PAREN_OPEN
=> self
::EXPRESSION_OP
528 self
::EXPRESSION_OP
=> [
529 self
::TYPE_HOOK
=> self
::EXPRESSION
,
530 self
::TYPE_PAREN_OPEN
=> self
::EXPRESSION_OP
532 self
::EXPRESSION_FUNC
=> [
533 self
::TYPE_BRACE_OPEN
=> self
::EXPRESSION_OP
535 self
::EXPRESSION_TERNARY
=> [
536 self
::TYPE_BRACE_OPEN
=> self
::EXPRESSION_TERNARY_OP
,
537 self
::TYPE_PAREN_OPEN
=> self
::EXPRESSION_TERNARY_OP
539 self
::EXPRESSION_TERNARY_OP
=> [
540 self
::TYPE_HOOK
=> self
::EXPRESSION_TERNARY
,
541 self
::TYPE_PAREN_OPEN
=> self
::EXPRESSION_TERNARY_OP
543 self
::EXPRESSION_TERNARY_FUNC
=> [
544 self
::TYPE_BRACE_OPEN
=> self
::EXPRESSION_TERNARY_OP
546 self
::PAREN_EXPRESSION
=> [
547 self
::TYPE_BRACE_OPEN
=> self
::PAREN_EXPRESSION_OP
,
548 self
::TYPE_PAREN_OPEN
=> self
::PAREN_EXPRESSION_OP
550 self
::PAREN_EXPRESSION_OP
=> [
551 self
::TYPE_PAREN_OPEN
=> self
::PAREN_EXPRESSION_OP
553 self
::PAREN_EXPRESSION_FUNC
=> [
554 self
::TYPE_BRACE_OPEN
=> self
::PAREN_EXPRESSION_OP
556 self
::PROPERTY_EXPRESSION
=> [
557 self
::TYPE_BRACE_OPEN
=> self
::PROPERTY_EXPRESSION_OP
,
558 self
::TYPE_PAREN_OPEN
=> self
::PROPERTY_EXPRESSION_OP
560 self
::PROPERTY_EXPRESSION_OP
=> [
561 self
::TYPE_BRACE_OPEN
=> self
::PROPERTY_EXPRESSION_OP
,
562 self
::TYPE_PAREN_OPEN
=> self
::PROPERTY_EXPRESSION_OP
564 self
::PROPERTY_EXPRESSION_FUNC
=> [
565 self
::TYPE_BRACE_OPEN
=> self
::PROPERTY_EXPRESSION_OP
569 // $pop : Rules for when to pop a state from the stack
571 self
::STATEMENT
=> [ self
::TYPE_BRACE_CLOSE
=> true ],
572 self
::PROPERTY_ASSIGNMENT
=> [ self
::TYPE_BRACE_CLOSE
=> true ],
573 self
::EXPRESSION
=> [ self
::TYPE_BRACE_CLOSE
=> true ],
574 self
::EXPRESSION_NO_NL
=> [ self
::TYPE_BRACE_CLOSE
=> true ],
575 self
::EXPRESSION_OP
=> [ self
::TYPE_BRACE_CLOSE
=> true ],
576 self
::EXPRESSION_TERNARY_OP
=> [ self
::TYPE_COLON
=> true ],
577 self
::PAREN_EXPRESSION
=> [ self
::TYPE_PAREN_CLOSE
=> true ],
578 self
::PAREN_EXPRESSION_OP
=> [ self
::TYPE_PAREN_CLOSE
=> true ],
579 self
::PROPERTY_EXPRESSION
=> [ self
::TYPE_BRACE_CLOSE
=> true ],
580 self
::PROPERTY_EXPRESSION_OP
=> [ self
::TYPE_BRACE_CLOSE
=> true ]
583 // $semicolon : Rules for when a semicolon insertion is appropriate
585 self
::EXPRESSION_NO_NL
=> [
586 self
::TYPE_UN_OP
=> true,
587 self
::TYPE_INCR_OP
=> true,
588 self
::TYPE_ADD_OP
=> true,
589 self
::TYPE_BRACE_OPEN
=> true,
590 self
::TYPE_PAREN_OPEN
=> true,
591 self
::TYPE_RETURN
=> true,
592 self
::TYPE_IF
=> true,
593 self
::TYPE_DO
=> true,
594 self
::TYPE_FUNC
=> true,
595 self
::TYPE_LITERAL
=> true
597 self
::EXPRESSION_OP
=> [
598 self
::TYPE_UN_OP
=> true,
599 self
::TYPE_INCR_OP
=> true,
600 self
::TYPE_BRACE_OPEN
=> true,
601 self
::TYPE_RETURN
=> true,
602 self
::TYPE_IF
=> true,
603 self
::TYPE_DO
=> true,
604 self
::TYPE_FUNC
=> true,
605 self
::TYPE_LITERAL
=> true
609 // $divStates : Contains all states that can be followed by a division operator
611 self
::EXPRESSION_OP
=> true,
612 self
::EXPRESSION_TERNARY_OP
=> true,
613 self
::PAREN_EXPRESSION_OP
=> true,
614 self
::PROPERTY_EXPRESSION_OP
=> true
617 // Here's where the minifying takes place: Loop through the input, looking for tokens
618 // and output them to $out, taking actions to the above defined rules when appropriate.
621 $length = strlen( $s );
623 $newlineFound = true;
624 $state = self
::STATEMENT
;
626 $last = ';'; // Pretend that we have seen a semicolon yet
627 while ( $pos < $length ) {
628 // First, skip over any whitespace and multiline comments, recording whether we
629 // found any newline character
630 $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
633 if ( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
634 // Multiline comment. Search for the end token or EOT.
635 $end = strpos( $s, '*/', $pos +
2 );
636 $skip = $end === false ?
$length - $pos : $end - $pos +
2;
640 // The semicolon insertion mechanism needs to know whether there was a newline
641 // between two tokens, so record it now.
642 if ( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
643 $newlineFound = true;
648 // Handle C++-style comments and html comments, which are treated as single line
649 // comments by the browser, regardless of whether the end tag is on the same line.
650 // Handle --> the same way, but only if it's at the beginning of the line
651 if ( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
652 ||
( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
653 ||
( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
655 $pos +
= strcspn( $s, "\r\n", $pos );
659 // Find out which kind of token we're handling.
660 // Note: $end must point past the end of the current token
661 // so that `substr($s, $pos, $end - $pos)` would be the entire token.
662 // In order words, $end will be the offset of the last relevant character
663 // in the stream + 1, or simply put: The offset of the first character
664 // of any next token in the stream.
666 // Handle string literals
667 if ( $ch === "'" ||
$ch === '"' ) {
668 // Search to the end of the string literal, skipping over backslash escapes
669 $search = $ch . '\\';
671 // Speculatively add 2 to the end so that if we see a backslash,
672 // the next iteration will start 2 characters further (one for the
673 // backslash, one for the escaped character).
674 // We'll correct this outside the loop.
675 $end +
= strcspn( $s, $search, $end ) +
2;
676 // If the last character in our search for a quote or a backlash
677 // matched a backslash and we haven't reached the end, keep searching..
678 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
679 // Correction (1): Undo speculative add, keep only one (end of string literal)
681 if ( $end > $length ) {
682 // Correction (2): Loop wrongly assumed an end quote ended the search,
683 // but search ended because we've reached the end. Correct $end.
684 // TODO: This is invalid and should throw.
687 // We have to distinguish between regexp literals and division operators
688 // A division operator is only possible in certain states
689 } elseif ( $ch === '/' && !isset( $divStates[$state] ) ) {
692 // Search until we find "/" (end of regexp), "\" (backslash escapes),
693 // or "[" (start of character classes).
695 // Speculatively add 2 to ensure next iteration skips
696 // over backslash and escaped character.
697 // We'll correct this outside the loop.
698 $end +
= strcspn( $s, '/[\\', $end ) +
2;
699 // If backslash escape, keep searching...
700 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
701 // Correction (1): Undo speculative add, keep only one (end of regexp)
703 if ( $end > $length ) {
704 // Correction (2): Loop wrongly assumed end slash was seen
705 // String ended without end of regexp. Correct $end.
706 // TODO: This is invalid and should throw.
710 if ( $s[$end - 1] === '/' ) {
713 // (Implicit else), we must've found the start of a char class,
714 // skip until we find "]" (end of char class), or "\" (backslash escape)
716 // Speculatively add 2 for backslash escape.
717 // We'll substract one outside the loop.
718 $end +
= strcspn( $s, ']\\', $end ) +
2;
719 // If backslash escape, keep searching...
720 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
721 // Correction (1): Undo speculative add, keep only one (end of regexp)
723 if ( $end > $length ) {
724 // Correction (2): Loop wrongly assumed "]" was seen
725 // String ended without ending char class or regexp. Correct $end.
726 // TODO: This is invalid and should throw.
731 // Search past the regexp modifiers (gi)
732 while ( $end < $length && ctype_alpha( $s[$end] ) ) {
737 && ( $pos +
1 < $length ) && ( $s[$pos +
1] === 'x' ||
$s[$pos +
1] === 'X' )
739 // Hex numeric literal
741 $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
743 return self
::parseError(
746 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...'
752 ||
( $ch === '.' && $pos +
1 < $length && ctype_digit( $s[$pos +
1] ) )
754 $end +
= strspn( $s, '0123456789', $end );
755 $decimal = strspn( $s, '.', $end );
757 if ( $decimal > 2 ) {
758 return self
::parseError( $s, $end, 'The number has too many decimal points' );
760 $end +
= strspn( $s, '0123456789', $end +
1 ) +
$decimal;
762 $exponent = strspn( $s, 'eE', $end );
764 if ( $exponent > 1 ) {
765 return self
::parseError( $s, $end, 'Number with several E' );
769 // + sign is optional; - sign is required.
770 $end +
= strspn( $s, '-+', $end );
771 $len = strspn( $s, '0123456789', $end );
773 return self
::parseError(
776 'No decimal digits after e, how many zeroes should be added?'
781 } elseif ( isset( $opChars[$ch] ) ) {
782 // Punctuation character. Search for the longest matching operator.
785 && isset( $tokenTypes[substr( $s, $pos, $end - $pos +
1 )] )
790 // Identifier or reserved word. Search for the end by excluding whitespace and
792 $end +
= strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
795 // Now get the token type from our type array
796 $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
797 $type = $tokenTypes[$token] ?? self
::TYPE_LITERAL
;
799 if ( $newlineFound && isset( $semicolon[$state][$type] ) ) {
800 // This token triggers the semicolon insertion mechanism of javascript. While we
801 // could add the ; token here ourselves, keeping the newline has a few advantages.
803 $state = self
::STATEMENT
;
805 } elseif ( $lineLength +
$end - $pos > self
::$maxLineLength &&
806 !isset( $semicolon[$state][$type] ) && $type !== self
::TYPE_INCR_OP
) {
807 // This line would get too long if we added $token, so add a newline first.
808 // Only do this if it won't trigger semicolon insertion and if it won't
809 // put a postfix increment operator on its own line, which is illegal in js.
812 // Check, whether we have to separate the token from the last one with whitespace
813 } elseif ( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
816 // Don't accidentally create ++, -- or // tokens
817 } elseif ( $last === $ch && ( $ch === '+' ||
$ch === '-' ||
$ch === '/' ) ) {
822 $type === self
::TYPE_LITERAL
823 && ( $token === 'true' ||
$token === 'false' )
824 && ( $state === self
::EXPRESSION ||
$state === self
::PROPERTY_EXPRESSION
)
827 $token = ( $token === 'true' ) ?
'!0' : '!1';
831 $lineLength +
= $end - $pos; // += strlen( $token )
832 $last = $s[$end - 1];
834 $newlineFound = false;
836 // Now that we have output our token, transition into the new state.
837 if ( isset( $push[$state][$type] ) && count( $stack ) < self
::STACK_LIMIT
) {
838 $stack[] = $push[$state][$type];
840 if ( $stack && isset( $pop[$state][$type] ) ) {
841 $state = array_pop( $stack );
842 } elseif ( isset( $model[$state][$type][self
::ACTION_GOTO
] ) ) {
843 $state = $model[$state][$type][self
::ACTION_GOTO
];
849 static function parseError( $fullJavascript, $position, $errorMsg ) {
850 // TODO: Handle the error: trigger_error, throw exception, return false...