<?php
-
/**
- * JSMinPlus version 1.3
+ * JSMinPlus version 1.4
*
* Minifies a javascript file using a javascript parser
*
* Usage: $minified = JSMinPlus::minify($script [, $filename])
*
* Versionlog (see also changelog.txt):
- * 17-05-2009 - fixed hook:colon precedence, fixed empty body in loop and if-constructs
+ * 23-07-2011 - remove dynamic creation of OP_* and KEYWORD_* defines and declare them on top
+ * reduce memory footprint by minifying by block-scope
+ * some small byte-saving and performance improvements
+ * 12-05-2009 - fixed hook:colon precedence, fixed empty body in loop and if-constructs
* 18-04-2009 - fixed crashbug in PHP 5.2.9 and several other bugfixes
* 12-04-2009 - some small bugfixes and performance improvements
* 09-04-2009 - initial open sourced version 1.0
*
* Latest version of this script: http://files.tweakers.net/jsminplus/jsminplus.zip
*
+ * @file
*/
/* ***** BEGIN LICENSE BLOCK *****
* the Initial Developer. All Rights Reserved.
*
* Contributor(s): Tino Zijdel <crisp@tweakers.net>
- * PHP port, modifications and minifier routine are (C) 2009
+ * PHP port, modifications and minifier routine are (C) 2009-2011
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
define('JS_GROUP', 112);
define('JS_LIST', 113);
+define('JS_MINIFIED', 999);
+
define('DECLARED_FORM', 0);
define('EXPRESSED_FORM', 1);
define('STATEMENT_FORM', 2);
+/* Operators */
+define('OP_SEMICOLON', ';');
+define('OP_COMMA', ',');
+define('OP_HOOK', '?');
+define('OP_COLON', ':');
+define('OP_OR', '||');
+define('OP_AND', '&&');
+define('OP_BITWISE_OR', '|');
+define('OP_BITWISE_XOR', '^');
+define('OP_BITWISE_AND', '&');
+define('OP_STRICT_EQ', '===');
+define('OP_EQ', '==');
+define('OP_ASSIGN', '=');
+define('OP_STRICT_NE', '!==');
+define('OP_NE', '!=');
+define('OP_LSH', '<<');
+define('OP_LE', '<=');
+define('OP_LT', '<');
+define('OP_URSH', '>>>');
+define('OP_RSH', '>>');
+define('OP_GE', '>=');
+define('OP_GT', '>');
+define('OP_INCREMENT', '++');
+define('OP_DECREMENT', '--');
+define('OP_PLUS', '+');
+define('OP_MINUS', '-');
+define('OP_MUL', '*');
+define('OP_DIV', '/');
+define('OP_MOD', '%');
+define('OP_NOT', '!');
+define('OP_BITWISE_NOT', '~');
+define('OP_DOT', '.');
+define('OP_LEFT_BRACKET', '[');
+define('OP_RIGHT_BRACKET', ']');
+define('OP_LEFT_CURLY', '{');
+define('OP_RIGHT_CURLY', '}');
+define('OP_LEFT_PAREN', '(');
+define('OP_RIGHT_PAREN', ')');
+define('OP_CONDCOMMENT_END', '@*/');
+
+define('OP_UNARY_PLUS', 'U+');
+define('OP_UNARY_MINUS', 'U-');
+
+/* Keywords */
+define('KEYWORD_BREAK', 'break');
+define('KEYWORD_CASE', 'case');
+define('KEYWORD_CATCH', 'catch');
+define('KEYWORD_CONST', 'const');
+define('KEYWORD_CONTINUE', 'continue');
+define('KEYWORD_DEBUGGER', 'debugger');
+define('KEYWORD_DEFAULT', 'default');
+define('KEYWORD_DELETE', 'delete');
+define('KEYWORD_DO', 'do');
+define('KEYWORD_ELSE', 'else');
+define('KEYWORD_ENUM', 'enum');
+define('KEYWORD_FALSE', 'false');
+define('KEYWORD_FINALLY', 'finally');
+define('KEYWORD_FOR', 'for');
+define('KEYWORD_FUNCTION', 'function');
+define('KEYWORD_IF', 'if');
+define('KEYWORD_IN', 'in');
+define('KEYWORD_INSTANCEOF', 'instanceof');
+define('KEYWORD_NEW', 'new');
+define('KEYWORD_NULL', 'null');
+define('KEYWORD_RETURN', 'return');
+define('KEYWORD_SWITCH', 'switch');
+define('KEYWORD_THIS', 'this');
+define('KEYWORD_THROW', 'throw');
+define('KEYWORD_TRUE', 'true');
+define('KEYWORD_TRY', 'try');
+define('KEYWORD_TYPEOF', 'typeof');
+define('KEYWORD_VAR', 'var');
+define('KEYWORD_VOID', 'void');
+define('KEYWORD_WHILE', 'while');
+define('KEYWORD_WITH', 'with');
+
+
class JSMinPlus
{
private $parser;
private function __construct()
{
- $this->parser = new JSParser();
+ $this->parser = new JSParser($this);
}
public static function minify($js, $filename='')
return false;
}
- private function parseTree($n, $noBlockGrouping = false)
+ public function parseTree($n, $noBlockGrouping = false)
{
$s = '';
switch ($n->type)
{
- case KEYWORD_FUNCTION:
- $s .= 'function' . ($n->name ? ' ' . $n->name : '') . '(';
- $params = $n->params;
- for ($i = 0, $j = count($params); $i < $j; $i++)
- $s .= ($i ? ',' : '') . $params[$i];
- $s .= '){' . $this->parseTree($n->body, true) . '}';
+ case JS_MINIFIED:
+ $s = $n->value;
break;
case JS_SCRIPT:
- // we do nothing with funDecls or varDecls
+ // we do nothing yet with funDecls or varDecls
$noBlockGrouping = true;
// FALL THROUGH
}
break;
+ case KEYWORD_FUNCTION:
+ $s .= 'function' . ($n->name ? ' ' . $n->name : '') . '(';
+ $params = $n->params;
+ for ($i = 0, $j = count($params); $i < $j; $i++)
+ $s .= ($i ? ',' : '') . $params[$i];
+ $s .= '){' . $this->parseTree($n->body, true) . '}';
+ break;
+
case KEYWORD_IF:
$s = 'if(' . $this->parseTree($n->condition) . ')';
$thenPart = $this->parseTree($n->thenPart);
break;
case KEYWORD_THROW:
- $s = 'throw ' . $this->parseTree($n->exception);
- break;
-
case KEYWORD_RETURN:
- $s = 'return';
+ $s = $n->type;
if ($n->value)
{
$t = $this->parseTree($n->value);
if (strlen($t))
{
- if ( $t[0] != '(' && $t[0] != '[' && $t[0] != '{' &&
- $t[0] != '"' && $t[0] != "'" && $t[0] != '/'
- )
+ if ($this->isWordChar($t[0]) || $t[0] == '\\')
$s .= ' ';
$s .= $t;
}
break;
+ case KEYWORD_IN:
+ case KEYWORD_INSTANCEOF:
+ $left = $this->parseTree($n->treeNodes[0]);
+ $right = $this->parseTree($n->treeNodes[1]);
+
+ $s = $left;
+
+ if ($this->isWordChar(substr($left, -1)))
+ $s .= ' ';
+
+ $s .= $n->type;
+
+ if ($this->isWordChar($right[0]) || $right[0] == '\\')
+ $s .= ' ';
+
+ $s .= $right;
+ break;
+
+ case KEYWORD_DELETE:
+ case KEYWORD_TYPEOF:
+ $right = $this->parseTree($n->treeNodes[0]);
+
+ $s = $n->type;
+
+ if ($this->isWordChar($right[0]) || $right[0] == '\\')
+ $s .= ' ';
+
+ $s .= $right;
+ break;
+
+ case KEYWORD_VOID:
+ $s = 'void(' . $this->parseTree($n->treeNodes[0]) . ')';
+ break;
+
case KEYWORD_DEBUGGER:
throw new Exception('NOT IMPLEMENTED: DEBUGGER');
break;
}
break;
- case KEYWORD_IN:
- $s = $this->parseTree($n->treeNodes[0]) . ' in ' . $this->parseTree($n->treeNodes[1]);
- break;
-
- case KEYWORD_INSTANCEOF:
- $s = $this->parseTree($n->treeNodes[0]) . ' instanceof ' . $this->parseTree($n->treeNodes[1]);
- break;
-
- case KEYWORD_DELETE:
- $s = 'delete ' . $this->parseTree($n->treeNodes[0]);
- break;
-
- case KEYWORD_VOID:
- $s = 'void(' . $this->parseTree($n->treeNodes[0]) . ')';
- break;
-
- case KEYWORD_TYPEOF:
- $s = 'typeof ' . $this->parseTree($n->treeNodes[0]);
- break;
-
case OP_NOT:
case OP_BITWISE_NOT:
case OP_UNARY_PLUS:
$s .= '}';
break;
+ case TOKEN_NUMBER:
+ $s = $n->value;
+ if (preg_match('/^([1-9]+)(0{3,})$/', $s, $m))
+ $s = $m[1] . 'e' . strlen($m[2]);
+ break;
+
case KEYWORD_NULL: case KEYWORD_THIS: case KEYWORD_TRUE: case KEYWORD_FALSE:
- case TOKEN_IDENTIFIER: case TOKEN_NUMBER: case TOKEN_STRING: case TOKEN_REGEXP:
+ case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_REGEXP:
$s = $n->value;
break;
case JS_GROUP:
- $s = '(' . $this->parseTree($n->treeNodes[0]) . ')';
+ if (in_array(
+ $n->treeNodes[0]->type,
+ array(
+ JS_ARRAY_INIT, JS_OBJECT_INIT, JS_GROUP,
+ TOKEN_NUMBER, TOKEN_STRING, TOKEN_REGEXP, TOKEN_IDENTIFIER,
+ KEYWORD_NULL, KEYWORD_THIS, KEYWORD_TRUE, KEYWORD_FALSE
+ )
+ ))
+ {
+ $s = $this->parseTree($n->treeNodes[0]);
+ }
+ else
+ {
+ $s = '(' . $this->parseTree($n->treeNodes[0]) . ')';
+ }
break;
default:
{
return preg_match('/^[a-zA-Z_][a-zA-Z0-9_]*$/', $string) && !in_array($string, $this->reserved);
}
+
+ private function isWordChar($char)
+ {
+ return $char == '_' || $char == '$' || ctype_alnum($char);
+ }
}
class JSParser
{
private $t;
+ private $minifier;
private $opPrecedence = array(
';' => 0,
TOKEN_CONDCOMMENT_START => 1, TOKEN_CONDCOMMENT_END => 1
);
- public function __construct()
+ public function __construct($minifier=null)
{
+ $this->minifier = $minifier;
$this->t = new JSTokenizer();
}
$n->funDecls = $x->funDecls;
$n->varDecls = $x->varDecls;
+ // minify by scope
+ if ($this->minifier)
+ {
+ $n->value = $this->minifier->parseTree($n);
+
+ // clear tree from node to save memory
+ $n->treeNodes = null;
+ $n->funDecls = null;
+ $n->varDecls = null;
+
+ $n->type = JS_MINIFIED;
+ }
+
return $n;
}
case KEYWORD_THROW:
$n = new JSNode($this->t);
- $n->exception = $this->Expression($x);
+ $n->value = $this->Expression($x);
break;
case KEYWORD_RETURN:
);
private $opTypeNames = array(
- ';' => 'SEMICOLON',
- ',' => 'COMMA',
- '?' => 'HOOK',
- ':' => 'COLON',
- '||' => 'OR',
- '&&' => 'AND',
- '|' => 'BITWISE_OR',
- '^' => 'BITWISE_XOR',
- '&' => 'BITWISE_AND',
- '===' => 'STRICT_EQ',
- '==' => 'EQ',
- '=' => 'ASSIGN',
- '!==' => 'STRICT_NE',
- '!=' => 'NE',
- '<<' => 'LSH',
- '<=' => 'LE',
- '<' => 'LT',
- '>>>' => 'URSH',
- '>>' => 'RSH',
- '>=' => 'GE',
- '>' => 'GT',
- '++' => 'INCREMENT',
- '--' => 'DECREMENT',
- '+' => 'PLUS',
- '-' => 'MINUS',
- '*' => 'MUL',
- '/' => 'DIV',
- '%' => 'MOD',
- '!' => 'NOT',
- '~' => 'BITWISE_NOT',
- '.' => 'DOT',
- '[' => 'LEFT_BRACKET',
- ']' => 'RIGHT_BRACKET',
- '{' => 'LEFT_CURLY',
- '}' => 'RIGHT_CURLY',
- '(' => 'LEFT_PAREN',
- ')' => 'RIGHT_PAREN',
- '@*/' => 'CONDCOMMENT_END'
+ ';', ',', '?', ':', '||', '&&', '|', '^',
+ '&', '===', '==', '=', '!==', '!=', '<<', '<=',
+ '<', '>>>', '>>', '>=', '>', '++', '--', '+',
+ '-', '*', '/', '%', '!', '~', '.', '[',
+ ']', '{', '}', '(', ')', '@*/'
);
private $assignOps = array('|', '^', '&', '<<', '>>', '>>>', '+', '-', '*', '/', '%');
public function __construct()
{
- $this->opRegExp = '#^(' . implode('|', array_map('preg_quote', array_keys($this->opTypeNames))) . ')#';
-
- // this is quite a hidden yet convenient place to create the defines for operators and keywords
- foreach ($this->opTypeNames as $operand => $name)
- define('OP_' . $name, $operand);
-
- define('OP_UNARY_PLUS', 'U+');
- define('OP_UNARY_MINUS', 'U-');
-
- foreach ($this->keywords as $keyword)
- define('KEYWORD_' . strtoupper($keyword), $keyword);
+ $this->opRegExp = '#^(' . implode('|', array_map('preg_quote', $this->opTypeNames)) . ')#';
}
public function init($source, $filename = '', $lineno = 1)
{
switch ($input[0])
{
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- if (preg_match('/^\d+\.\d*(?:[eE][-+]?\d+)?|^\d+(?:\.\d*)?[eE][-+]?\d+/', $input, $match))
+ case '0':
+ // hexadecimal
+ if (($input[1] == 'x' || $input[1] == 'X') && preg_match('/^0x[0-9a-f]+/i', $input, $match))
{
$tt = TOKEN_NUMBER;
+ break;
+ }
+ // FALL THROUGH
+
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ // should always match
+ preg_match('/^\d+(?:\.\d*)?(?:[eE][-+]?\d+)?/', $input, $match);
+ $tt = TOKEN_NUMBER;
+ break;
+
+ case "'":
+ if (preg_match('/^\'(?:[^\\\\\'\r\n]++|\\\\(?:.|\r?\n))*\'/', $input, $match))
+ {
+ $tt = TOKEN_STRING;
}
- else if (preg_match('/^0[xX][\da-fA-F]+|^0[0-7]*|^\d+/', $input, $match))
+ else
{
- // this should always match because of \d+
- $tt = TOKEN_NUMBER;
+ if ($chunksize)
+ return $this->get(null); // retry with a full chunk fetch
+
+ throw $this->newSyntaxError('Unterminated string literal');
}
break;
case '"':
- case "'":
- if (preg_match('/^"(?:\\\\(?:.|\r?\n)|[^\\\\"\r\n]+)*"|^\'(?:\\\\(?:.|\r?\n)|[^\\\\\'\r\n]+)*\'/', $input, $match))
+ if (preg_match('/^"(?:[^\\\\"\r\n]++|\\\\(?:.|\r?\n))*"/', $input, $match))
{
$tt = TOKEN_STRING;
}
break;
default:
- // FIXME: add support for unicode and unicode escape sequence \uHHHH
- if (preg_match('/^[$\w]+/', $input, $match))
+ // Fast path for identifiers: word chars followed by whitespace or various other tokens.
+ // Note we don't need to exclude digits in the first char, as they've already been found
+ // above.
+ if (!preg_match('/^[$\w]+(?=[\s\/\|\^\&<>\+\-\*%=!.;,\?:~\[\]\{\}\(\)@])/', $input, $match))
{
- $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
+ // Character classes per ECMA-262 edition 5.1 section 7.6
+ // Per spec, must accept Unicode 3.0, *may* accept later versions.
+ // We'll take whatever PCRE understands, which should be more recent.
+ $identifierStartChars = "\\p{L}\\p{Nl}" . # UnicodeLetter
+ "\$" .
+ "_";
+ $identifierPartChars = $identifierStartChars .
+ "\\p{Mn}\\p{Mc}" . # UnicodeCombiningMark
+ "\\p{Nd}" . # UnicodeDigit
+ "\\p{Pc}"; # UnicodeConnectorPunctuation
+ $unicodeEscape = "\\\\u[0-9A-F-a-f]{4}";
+ $identifierRegex = "/^" .
+ "(?:[$identifierStartChars]|$unicodeEscape)" .
+ "(?:[$identifierPartChars]|$unicodeEscape)*" .
+ "/uS";
+ if (preg_match($identifierRegex, $input, $match))
+ {
+ if (strpos($match[0], '\\') !== false) {
+ // Per ECMA-262 edition 5.1, section 7.6 escape sequences should behave as if they were
+ // the original chars, but only within the boundaries of the identifier.
+ $decoded = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/',
+ array(__CLASS__, 'unicodeEscapeCallback'),
+ $match[0]);
+
+ // Since our original regex didn't de-escape the originals, we need to check for validity again.
+ // No need to worry about token boundaries, as anything outside the identifier is illegal!
+ if (!preg_match("/^[$identifierStartChars][$identifierPartChars]*$/u", $decoded)) {
+ throw $this->newSyntaxError('Illegal token');
+ }
+
+ // Per spec it _ought_ to work to use these escapes for keywords words as well...
+ // but IE rejects them as invalid, while Firefox and Chrome treat them as identifiers
+ // that don't match the keyword.
+ if (in_array($decoded, $this->keywords)) {
+ throw $this->newSyntaxError('Illegal token');
+ }
+
+ // TODO: save the decoded form for output?
+ }
+ }
+ else
+ throw $this->newSyntaxError('Illegal token');
}
- else
- throw $this->newSyntaxError('Illegal token');
+ $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
}
}
{
return new Exception('Parse error: ' . $m . ' in file \'' . $this->filename . '\' on line ' . $this->lineno);
}
+
+ public static function unicodeEscapeCallback($m)
+ {
+ return html_entity_decode('&#x' . $m[1]. ';', ENT_QUOTES, 'UTF-8');
+ }
}
class JSToken
public $lineno;
public $assignOp;
}
-
-?>