*
* @file
* @author Paul Copperman <paul.copperman@gmail.com>
- * @license Choose any of Apache, MIT, GPL, LGPL
+ * @license Apache-2.0
+ * @license MIT
+ * @license GPL-2.0-or-later
+ * @license LGPL-2.1-or-later
*/
/**
* slow, because they construct a complete parse tree before outputting the code minified.
* So this class is meant to allow arbitrary (but syntactically correct) input, while being
* fast enough to be used for on-the-fly minifying.
+ *
+ * This class was written with ECMA-262 Edition 3 in mind ("ECMAScript 3"). Parsing features
+ * new to ECMAScript 5 or later might not be supported. However, Edition 5.1 better reflects
+ * how actual JS engines worked and work and is simpler and more readable prose. As such,
+ * the below code will refer to sections of the 5.1 specification.
+ *
+ * See <https://www.ecma-international.org/ecma-262/5.1/>.
*/
class JavaScriptMinifier {
const PROPERTY_EXPRESSION_FUNC = 15;
/* Token types */
- const TYPE_UN_OP = 1; // unary operators
- const TYPE_INCR_OP = 2; // ++ and --
- const TYPE_BIN_OP = 3; // binary operators
- const TYPE_ADD_OP = 4; // + and - which can be either unary or binary ops
- const TYPE_HOOK = 5; // ?
- const TYPE_COLON = 6; // :
- const TYPE_COMMA = 7; // ,
- const TYPE_SEMICOLON = 8; // ;
- const TYPE_BRACE_OPEN = 9; // {
- const TYPE_BRACE_CLOSE = 10; // }
- const TYPE_PAREN_OPEN = 11; // ( and [
- const TYPE_PAREN_CLOSE = 12; // ) and ]
- const TYPE_RETURN = 13; // keywords: break, continue, return, throw
- const TYPE_IF = 14; // keywords: catch, for, with, switch, while, if
- const TYPE_DO = 15; // keywords: case, var, finally, else, do, try
- const TYPE_FUNC = 16; // keywords: function
- const TYPE_LITERAL = 17; // all literals, identifiers and unrecognised tokens
+ const TYPE_UN_OP = 101; // unary operators
+ const TYPE_INCR_OP = 102; // ++ and --
+ const TYPE_BIN_OP = 103; // binary operators
+ const TYPE_ADD_OP = 104; // + and - which can be either unary or binary ops
+ const TYPE_HOOK = 105; // ?
+ const TYPE_COLON = 106; // :
+ const TYPE_COMMA = 107; // ,
+ const TYPE_SEMICOLON = 108; // ;
+ const TYPE_BRACE_OPEN = 109; // {
+ const TYPE_BRACE_CLOSE = 110; // }
+ const TYPE_PAREN_OPEN = 111; // ( and [
+ const TYPE_PAREN_CLOSE = 112; // ) and ]
+ const TYPE_RETURN = 113; // keywords: break, continue, return, throw
+ const TYPE_IF = 114; // keywords: catch, for, with, switch, while, if
+ const TYPE_DO = 115; // keywords: case, var, finally, else, do, try
+ const TYPE_FUNC = 116; // keywords: function
+ const TYPE_LITERAL = 117; // all literals, identifiers and unrecognised tokens
// Sanity limit to avoid excessive memory usage
const STACK_LIMIT = 1000;
/**
- * NOTE: This isn't a strict maximum. Longer lines will be produced when
- * literals (e.g. quoted strings) longer than this are encountered
- * or when required to guard against semicolon insertion.
+ * Maximum line length
+ *
+ * This is not a strict maximum, but a guideline. Longer lines will be
+ * produced when literals (e.g. quoted strings) longer than this are
+ * encountered, or when required to guard against semicolon insertion.
+ *
+ * This is a private member (instead of constant) to allow tests to
+ * set it to 1, to verify ASI and line-breaking behaviour.
*/
- const MAX_LINE_LENGTH = 1000;
+ private static $maxLineLength = 1000;
/**
* Returns minified JavaScript code.
public static function minify( $s ) {
// First we declare a few tables that contain our parsing rules
- // $opChars : characters, which can be combined without whitespace in between them
+ // $opChars : Characters which can be combined without whitespace between them.
$opChars = [
- '!' => true,
- '"' => true,
- '%' => true,
- '&' => true,
- "'" => true,
+ // ECMAScript 5.1 § 7.7 Punctuators
+ // Unlike the spec, these are individual symbols, not sequences.
+ '{' => true,
+ '}' => true,
'(' => true,
')' => true,
- '*' => true,
- '+' => true,
- ',' => true,
- '-' => true,
+ '[' => true,
+ ']' => true,
'.' => true,
- '/' => true,
- ':' => true,
';' => true,
+ ',' => true,
'<' => true,
- '=' => true,
'>' => true,
- '?' => true,
- '[' => true,
- ']' => true,
- '^' => true,
- '{' => true,
+ '=' => true,
+ '!' => true,
+ '+' => true,
+ '-' => true,
+ '*' => true,
+ '%' => true,
+ '&' => true,
'|' => true,
- '}' => true,
- '~' => true
+ '^' => true,
+ '~' => true,
+ '?' => true,
+ ':' => true,
+ '/' => true,
+ // ECMAScript 5.1 § 7.8.4 String Literals
+ '"' => true,
+ "'" => true,
];
- // $tokenTypes : maps keywords and operators to their corresponding token type
+ // $tokenTypes : Map keywords and operators to their corresponding token type
$tokenTypes = [
- '!' => self::TYPE_UN_OP,
- '~' => self::TYPE_UN_OP,
- 'delete' => self::TYPE_UN_OP,
+ // ECMAScript 5.1 § 11.4 Unary Operators
+ // ECMAScript 5.1 § 11.6 Additive Operators
+ // UnaryExpression includes PostfixExpression, which includes 'new'.
'new' => self::TYPE_UN_OP,
- 'typeof' => self::TYPE_UN_OP,
+ 'delete' => self::TYPE_UN_OP,
'void' => self::TYPE_UN_OP,
+ 'typeof' => self::TYPE_UN_OP,
'++' => self::TYPE_INCR_OP,
'--' => self::TYPE_INCR_OP,
+ '+' => self::TYPE_ADD_OP,
+ '-' => self::TYPE_ADD_OP,
+ '~' => self::TYPE_UN_OP,
+ '!' => self::TYPE_UN_OP,
+ // ECMAScript 5.1 § 11.5 Multiplicative Operators
+ '*' => self::TYPE_BIN_OP,
+ '/' => self::TYPE_BIN_OP,
+ '%' => self::TYPE_BIN_OP,
+ // ECMAScript 5.1 § 11.7 Bitwise Shift Operators
+ '<<' => self::TYPE_BIN_OP,
+ '>>' => self::TYPE_BIN_OP,
+ '>>>' => self::TYPE_BIN_OP,
+ // ECMAScript 5.1 § 11.8 Relational Operators
+ '<' => self::TYPE_BIN_OP,
+ '>' => self::TYPE_BIN_OP,
+ '<=' => self::TYPE_BIN_OP,
+ '>=' => self::TYPE_BIN_OP,
+ // ECMAScript 5.1 § 11.9 Equality Operators
+ '==' => self::TYPE_BIN_OP,
'!=' => self::TYPE_BIN_OP,
+ '===' => self::TYPE_BIN_OP,
'!==' => self::TYPE_BIN_OP,
- '%' => self::TYPE_BIN_OP,
- '%=' => self::TYPE_BIN_OP,
+ 'instanceof' => self::TYPE_BIN_OP,
+ 'in' => self::TYPE_BIN_OP,
+ // ECMAScript 5.1 § 11.10 Binary Bitwise Operators
'&' => self::TYPE_BIN_OP,
+ '^' => self::TYPE_BIN_OP,
+ '|' => self::TYPE_BIN_OP,
+ // ECMAScript 5.1 § 11.11 Binary Logical Operators
'&&' => self::TYPE_BIN_OP,
- '&=' => self::TYPE_BIN_OP,
- '*' => self::TYPE_BIN_OP,
+ '||' => self::TYPE_BIN_OP,
+ // ECMAScript 5.1 § 11.12 Conditional Operator
+ // Also known as ternary.
+ '?' => self::TYPE_HOOK,
+ ':' => self::TYPE_COLON,
+ // ECMAScript 5.1 § 11.13 Assignment Operators
+ '=' => self::TYPE_BIN_OP,
'*=' => self::TYPE_BIN_OP,
+ '/=' => self::TYPE_BIN_OP,
+ '%=' => self::TYPE_BIN_OP,
'+=' => self::TYPE_BIN_OP,
'-=' => self::TYPE_BIN_OP,
- '.' => self::TYPE_BIN_OP,
- '/' => self::TYPE_BIN_OP,
- '/=' => self::TYPE_BIN_OP,
- '<' => self::TYPE_BIN_OP,
- '<<' => self::TYPE_BIN_OP,
'<<=' => self::TYPE_BIN_OP,
- '<=' => self::TYPE_BIN_OP,
- '=' => self::TYPE_BIN_OP,
- '==' => self::TYPE_BIN_OP,
- '===' => self::TYPE_BIN_OP,
- '>' => self::TYPE_BIN_OP,
- '>=' => self::TYPE_BIN_OP,
- '>>' => self::TYPE_BIN_OP,
'>>=' => self::TYPE_BIN_OP,
- '>>>' => self::TYPE_BIN_OP,
'>>>=' => self::TYPE_BIN_OP,
- '^' => self::TYPE_BIN_OP,
+ '&=' => self::TYPE_BIN_OP,
'^=' => self::TYPE_BIN_OP,
- '|' => self::TYPE_BIN_OP,
'|=' => self::TYPE_BIN_OP,
- '||' => self::TYPE_BIN_OP,
- 'in' => self::TYPE_BIN_OP,
- 'instanceof' => self::TYPE_BIN_OP,
- '+' => self::TYPE_ADD_OP,
- '-' => self::TYPE_ADD_OP,
- '?' => self::TYPE_HOOK,
- ':' => self::TYPE_COLON,
+ // ECMAScript 5.1 § 11.14 Comma Operator
',' => self::TYPE_COMMA,
- ';' => self::TYPE_SEMICOLON,
- '{' => self::TYPE_BRACE_OPEN,
- '}' => self::TYPE_BRACE_CLOSE,
- '(' => self::TYPE_PAREN_OPEN,
- '[' => self::TYPE_PAREN_OPEN,
- ')' => self::TYPE_PAREN_CLOSE,
- ']' => self::TYPE_PAREN_CLOSE,
- 'break' => self::TYPE_RETURN,
+
+ // The keywords that disallow LineTerminator before their
+ // (sometimes optional) Expression or Identifier.
+ //
+ // keyword ;
+ // keyword [no LineTerminator here] Identifier ;
+ // keyword [no LineTerminator here] Expression ;
+ //
+ // See also ECMAScript 5.1:
+ // - § 12.7 The continue Statement
+ // - $ 12.8 The break Statement
+ // - § 12.9 The return Statement
+ // - § 12.13 The throw Statement
'continue' => self::TYPE_RETURN,
+ 'break' => self::TYPE_RETURN,
'return' => self::TYPE_RETURN,
'throw' => self::TYPE_RETURN,
+
+ // The keywords require a parenthesised Expression or Identifier
+ // before the next Statement.
+ //
+ // keyword ( Expression ) Statement
+ // keyword ( Identifier ) Statement
+ //
+ // See also ECMAScript 5.1:
+ // - § 12.5 The if Statement
+ // - § 12.6 Iteration Statements (do, while, for)
+ // - § 12.10 The with Statement
+ // - § 12.11 The switch Statement
+ // - § 12.13 The throw Statement
+ 'if' => self::TYPE_IF,
'catch' => self::TYPE_IF,
+ 'while' => self::TYPE_IF,
'for' => self::TYPE_IF,
- 'if' => self::TYPE_IF,
'switch' => self::TYPE_IF,
- 'while' => self::TYPE_IF,
'with' => self::TYPE_IF,
- 'case' => self::TYPE_DO,
- 'do' => self::TYPE_DO,
+
+ // The keywords followed by an Identifier, Statement,
+ // Expression, or Block.
+ //
+ // var Identifier
+ // else Statement
+ // do Statement
+ // case Expression
+ // try Block
+ // finally Block
+ //
+ // See also ECMAScript 5.1:
+ // - § 12.2 Variable Statement
+ // - § 12.5 The if Statement (else)
+ // - § 12.6 Iteration Statements (do, while, for)
+ // - § 12.11 The switch Statement (case)
+ // - § 12.14 The try Statement
+ 'var' => self::TYPE_DO,
'else' => self::TYPE_DO,
- 'finally' => self::TYPE_DO,
+ 'do' => self::TYPE_DO,
+ 'case' => self::TYPE_DO,
'try' => self::TYPE_DO,
- 'var' => self::TYPE_DO,
- 'function' => self::TYPE_FUNC
+ 'finally' => self::TYPE_DO,
+
+ // ECMAScript 5.1 § 13 Function Definition
+ 'function' => self::TYPE_FUNC,
+
+ // Can be one of:
+ // - DecimalLiteral (ECMAScript 5.1 § 7.8.3 Numeric Literals)
+ // - MemberExpression (ECMAScript 5.1 § 11.2 Left-Hand-Side Expressions)
+ '.' => self::TYPE_BIN_OP,
+
+ // Can be one of:
+ // - Block (ECMAScript 5.1 § 12.1 Block)
+ // - ObjectLiteral (ECMAScript 5.1 § 11.1 Primary Expressions)
+ '{' => self::TYPE_BRACE_OPEN,
+ '}' => self::TYPE_BRACE_CLOSE,
+
+ // Can be one of:
+ // - Parenthesised Identifier or Expression after a
+ // TYPE_IF or TYPE_FUNC keyword.
+ // - PrimaryExpression (ECMAScript 5.1 § 11.1 Primary Expressions)
+ // - CallExpression (ECMAScript 5.1 § 11.2 Left-Hand-Side Expressions)
+ '(' => self::TYPE_PAREN_OPEN,
+ ')' => self::TYPE_PAREN_CLOSE,
+
+ // Can be one of:
+ // - ArrayLiteral (ECMAScript 5.1 § 11.1 Primary Expressions)
+ '[' => self::TYPE_PAREN_OPEN,
+ ']' => self::TYPE_PAREN_CLOSE,
+
+ // Can be one of:
+ // - End of any statement
+ // - EmptyStatement (ECMAScript 5.1 § 12.3 Empty Statement)
+ ';' => self::TYPE_SEMICOLON,
];
// $goto : This is the main table for our state machine. For every state/token pair
self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
],
self::PROPERTY_EXPRESSION_OP => [
+ self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
],
self::PROPERTY_EXPRESSION_FUNC => [
// Now get the token type from our type array
$token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
- $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL;
+ $type = $tokenTypes[$token] ?? self::TYPE_LITERAL;
if ( $newlineFound && isset( $semicolon[$state][$type] ) ) {
// This token triggers the semicolon insertion mechanism of javascript. While we
$out .= "\n";
$state = self::STATEMENT;
$lineLength = 0;
- } elseif ( $lineLength + $end - $pos > self::MAX_LINE_LENGTH &&
+ } elseif ( $lineLength + $end - $pos > self::$maxLineLength &&
!isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP ) {
// This line would get too long if we added $token, so add a newline first.
// Only do this if it won't trigger semicolon insertion and if it won't