<?php
-
/**
* JSMinPlus version 1.4
*
*
* Latest version of this script: http://files.tweakers.net/jsminplus/jsminplus.zip
*
+ * @file
*/
/* ***** BEGIN LICENSE BLOCK *****
break;
default:
- // FIXME: add support for unicode and unicode escape sequence \uHHHH
- if (preg_match('/^[$\w]+/', $input, $match))
+ // Fast path for identifiers: word chars followed by whitespace or various other tokens.
+ // Note we don't need to exclude digits in the first char, as they've already been found
+ // above.
+ if (!preg_match('/^[$\w]+(?=[\s\/\|\^\&<>\+\-\*%=!.;,\?:~\[\]\{\}\(\)@])/', $input, $match))
{
- $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
+ // Character classes per ECMA-262 edition 5.1 section 7.6
+ // Per spec, must accept Unicode 3.0, *may* accept later versions.
+ // We'll take whatever PCRE understands, which should be more recent.
+ $identifierStartChars = "\\p{L}\\p{Nl}" . # UnicodeLetter
+ "\$" .
+ "_";
+ $identifierPartChars = $identifierStartChars .
+ "\\p{Mn}\\p{Mc}" . # UnicodeCombiningMark
+ "\\p{Nd}" . # UnicodeDigit
+ "\\p{Pc}"; # UnicodeConnectorPunctuation
+ $unicodeEscape = "\\\\u[0-9A-F-a-f]{4}";
+ $identifierRegex = "/^" .
+ "(?:[$identifierStartChars]|$unicodeEscape)" .
+ "(?:[$identifierPartChars]|$unicodeEscape)*" .
+ "/uS";
+ if (preg_match($identifierRegex, $input, $match))
+ {
+ if (strpos($match[0], '\\') !== false) {
+ // Per ECMA-262 edition 5.1, section 7.6 escape sequences should behave as if they were
+ // the original chars, but only within the boundaries of the identifier.
+ $decoded = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/',
+ array(__CLASS__, 'unicodeEscapeCallback'),
+ $match[0]);
+
+ // Since our original regex didn't de-escape the originals, we need to check for validity again.
+ // No need to worry about token boundaries, as anything outside the identifier is illegal!
+ if (!preg_match("/^[$identifierStartChars][$identifierPartChars]*$/u", $decoded)) {
+ throw $this->newSyntaxError('Illegal token');
+ }
+
+ // Per spec it _ought_ to work to use these escapes for keywords words as well...
+ // but IE rejects them as invalid, while Firefox and Chrome treat them as identifiers
+ // that don't match the keyword.
+ if (in_array($decoded, $this->keywords)) {
+ throw $this->newSyntaxError('Illegal token');
+ }
+
+ // TODO: save the decoded form for output?
+ }
+ }
+ else
+ throw $this->newSyntaxError('Illegal token');
}
- else
- throw $this->newSyntaxError('Illegal token');
+ $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
}
}
{
return new Exception('Parse error: ' . $m . ' in file \'' . $this->filename . '\' on line ' . $this->lineno);
}
+
+ public static function unicodeEscapeCallback($m)
+ {
+ return html_entity_decode('&#x' . $m[1]. ';', ENT_QUOTES, 'UTF-8');
+ }
}
class JSToken