From 951103eeb2d619c7204819fbb31ef6c9bddd447a Mon Sep 17 00:00:00 2001 From: Trevor Parscal Date: Thu, 20 Jan 2011 21:57:01 +0000 Subject: [PATCH] Resolved bug 26791 by replacing JSMin with a new library called JavaScriptDistiller, which is an improved version of the minification bits from JavaScriptPacker, an LGPL library. Good news - it's 2x faster than our optimized JSMin anyways, and more configurable to boot. --- includes/AutoLoader.php | 3 +- includes/DefaultSettings.php | 2 + includes/libs/JSMin.php | 283 --------------------- includes/libs/JavaScriptDistiller.php | 75 ++++++ includes/libs/ParseMaster.php | 214 ++++++++++++++++ includes/resourceloader/ResourceLoader.php | 8 +- 6 files changed, 299 insertions(+), 286 deletions(-) delete mode 100644 includes/libs/JSMin.php create mode 100644 includes/libs/JavaScriptDistiller.php create mode 100644 includes/libs/ParseMaster.php diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 884478ca64..a02f681fb4 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -138,7 +138,7 @@ $wgAutoloadLocalClasses = array( 'IndexPager' => 'includes/Pager.php', 'Interwiki' => 'includes/Interwiki.php', 'IP' => 'includes/IP.php', - 'JSMin' => 'includes/libs/JSMin.php', + 'JavaScriptDistiller' => 'includes/libs/JavaScriptDistiller.php', 'LCStore_DB' => 'includes/LocalisationCache.php', 'LCStore_CDB' => 'includes/LocalisationCache.php', 'LCStore_Null' => 'includes/LocalisationCache.php', @@ -180,6 +180,7 @@ $wgAutoloadLocalClasses = array( 'PageHistory' => 'includes/HistoryPage.php', 'PageHistoryPager' => 'includes/HistoryPage.php', 'Pager' => 'includes/Pager.php', + 'ParseMaster' => 'includes/libs/ParseMaster.php', 'PasswordError' => 'includes/User.php', 'PatrolLog' => 'includes/PatrolLog.php', 'PhpHttpRequest' => 'includes/HttpFunctions.php', diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 893be8fbcd..90db83939e 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -2448,6 +2448,8 @@ $wgResourceLoaderDebug = false; */ $wgResourceLoaderUseESI = false; +$wgResourceLoaderMinifyJSVerticalSpace = false; + /** @} */ # End of resource loader settings } diff --git a/includes/libs/JSMin.php b/includes/libs/JSMin.php deleted file mode 100644 index 84ea757052..0000000000 --- a/includes/libs/JSMin.php +++ /dev/null @@ -1,283 +0,0 @@ - - * @copyright 2002 Douglas Crockford (jsmin.c) - * @copyright 2008 Ryan Grove (PHP port) - * @license http://opensource.org/licenses/mit-license.php MIT License - * @version 1.1.1 (2008-03-02) - * @link http://github.com/rgrove/jsmin-php/ - */ - -class JSMin { - const ORD_LF = 10; - const ORD_SPACE = 32; - - // Action constants - const OUTPUT = 1; - const DELETE_A = 2; - const DELETE_B = 3; - - /** Current character */ - protected $a = ''; - - /** Next character */ - protected $b = ''; - - protected $input = ''; - protected $inputIndex = 0; - protected $inputLength = 0; - protected $lookAhead = null; - protected $output = ''; - - // -- Public Static Methods -------------------------------------------------- - - public static function minify( $js ) { - $jsmin = new self( $js ); - $ret = $jsmin->min(); - return $ret; - } - - // -- Public Instance Methods ------------------------------------------------ - - public function __construct( $input ) { - // Fix line endings - $this->input = str_replace( "\r\n", "\n", $input ); - // Replace tabs and other control characters (except LF) with spaces - $this->input = preg_replace( '/[\x00-\x09\x0b-\x1f]/', ' ', $this->input ); - $this->inputLength = strlen( $this->input ); - } - - // -- Protected Instance Methods --------------------------------------------- - - /** - * Do something! What you do is determined by the argument: - * - self::OUTPUT Output A. Copy B to A. Get the next B. - * - self::DELETE_A Copy B to A. Get the next B. (Delete A). - * - self::DELETE_B Get the next B. (Delete B). - * action treats a string as a single character. Wow! - * action recognizes a regular expression if it is preceded by ( or , or =. - */ - protected function action( $d ) { - switch( $d ) { - case self::OUTPUT: - $this->output .= $this->a; - - case self::DELETE_A: - $this->a = $this->b; - - if ( $this->a === "'" || $this->a === '"' ) { - $interestingChars = $this->a . "\\\n"; - $this->output .= $this->a; - for ( ; ; ) { - $runLength = strcspn( $this->input, $interestingChars, $this->inputIndex ); - $this->output .= substr( $this->input, $this->inputIndex, $runLength ); - $this->inputIndex += $runLength; - $c = $this->get(); - - if ( $c === $this->b ) { - break; - } - - if ( $c === "\n" || $c === null ) { - throw new JSMinException( 'Unterminated string literal.' ); - } - - if ( $c === '\\' ) { - $this->output .= $c . $this->get(); - } - } - } - - case self::DELETE_B: - $this->b = $this->next(); - - if ( $this->b === '/' && ( - $this->a === '(' || $this->a === ',' || $this->a === '=' || - $this->a === ':' || $this->a === '[' || $this->a === '!' || - $this->a === '&' || $this->a === '|' || $this->a === '?' ) ) { - - $this->output .= $this->a . $this->b; - - for ( ; ; ) { - $runLength = strcspn( $this->input, "/\\\n", $this->inputIndex ); - $this->output .= substr( $this->input, $this->inputIndex, $runLength ); - $this->inputIndex += $runLength; - $this->a = $this->get(); - - if ( $this->a === '/' ) { - break; - } elseif ( $this->a === '\\' ) { - $this->output .= $this->a; - $this->a = $this->get(); - } elseif ( $this->a === "\n" || $this->a === null ) { - throw new JSMinException( 'Unterminated regular expression ' . - 'literal.' ); - } - - $this->output .= $this->a; - } - - $this->b = $this->next(); - } - } - } - - /** - * Return the next character from the input. Watch out for lookahead. If - * the character is a control character, translate it to a space or - * linefeed. - */ - protected function get() { - if ( $this->inputIndex < $this->inputLength ) { - return $this->input[$this->inputIndex++]; - } else { - return null; - } - } - - /** - * Return true if the character is a letter, digit, underscore, - * dollar sign, or non-ASCII character. - */ - protected function isAlphaNum( $c ) { - return ord( $c ) > 126 || $c === '\\' || preg_match( '/^[\w\$]$/', $c ) === 1; - } - - /** - * Copy the input to the output, deleting the characters which are - * insignificant to JavaScript. Comments will be removed. Tabs will be - * replaced with spaces. Carriage returns will be replaced with linefeeds. - * Most spaces and linefeeds will be removed. - */ - protected function min() { - $this->a = "\n"; - $this->action( self::DELETE_B ); - - while ( $this->a !== null ) { - switch ( $this->a ) { - case ' ': - if ( $this->isAlphaNum( $this->b ) ) { - $this->action( self::OUTPUT ); - } else { - $this->action( self::DELETE_A ); - } - break; - - case "\n": - switch ( $this->b ) { - case ' ': - $this->action( self::DELETE_B ); - break; - - default: - $this->action( self::OUTPUT ); - } - break; - - default: - switch ( $this->b ) { - case ' ': - if ( $this->isAlphaNum( $this->a ) ) { - $this->action( self::OUTPUT ); - break; - } - - $this->action( self::DELETE_B ); - break; - default: - $this->action( self::OUTPUT ); - break; - } - } - } - - // Remove initial line break - if ( $this->output[0] !== "\n" ) { - throw new JSMinException( 'Unexpected lack of line break.' ); - } - if ( $this->output === "\n" ) { - return ''; - } else { - return substr( $this->output, 1 ); - } - } - - /** - * Get the next character, excluding comments. - */ - protected function next() { - if ( $this->inputIndex >= $this->inputLength ) { - return null; - } - $c = $this->input[$this->inputIndex++]; - - if ( $this->inputIndex >= $this->inputLength ) { - return $c; - } - - if ( $c === '/' ) { - switch( $this->input[$this->inputIndex] ) { - case '/': - $this->inputIndex += strcspn( $this->input, "\n", $this->inputIndex ) + 1; - return "\n"; - case '*': - $endPos = strpos( $this->input, '*/', $this->inputIndex + 1 ); - if ( $endPos === false ) { - throw new JSMinException( 'Unterminated comment.' ); - } - $numLines = substr_count( $this->input, "\n", $this->inputIndex, - $endPos - $this->inputIndex ); - $this->inputIndex = $endPos + 2; - if ( $numLines ) { - return str_repeat( "\n", $numLines ); - } else { - return ' '; - } - default: - return $c; - } - } - - return $c; - } -} - -// -- Exceptions --------------------------------------------------------------- -class JSMinException extends Exception {} diff --git a/includes/libs/JavaScriptDistiller.php b/includes/libs/JavaScriptDistiller.php new file mode 100644 index 0000000000..53b4d7012f --- /dev/null +++ b/includes/libs/JavaScriptDistiller.php @@ -0,0 +1,75 @@ +escapeChar = chr( 1 ); + // Protect strings. The original code had [^\'\\v] here, but that didn't armor multiline + // strings correctly. This also armors multiline strings that don't have backslashes at the + // end of the line (these are invalid), but that's fine because we're just armoring here. + $parser->add('/\'[^\']*\'/', '$1' ); + $parser->add('/"[^"]*"/', '$1' ); + // Remove comments + $parser->add('/\\/\\/[^\v]*[\v]/', ' '); + $parser->add('/\\/\\*[^*]*\\*+([^\\/][^*]*\\*+)*\\//', ' '); + // Protect regular expressions + $parser->add('/\\h+(\\/[^\\/\\v\\*][^\\/\\v]*\\/g?i?)/', '$2'); // IGNORE + $parser->add('/[^\\w\\x24\\/\'"*)\\?:]\\/[^\\/\\v\\*][^\\/\\v]*\\/g?i?/', '$1'); + // Remove: ;;; doSomething(); + $parser->add('/;;;[^\\v]+[\\v]/'); + // Remove redundant semi-colons + $parser->add('/\\(;;\\)/', '$1'); // protect for (;;) loops + $parser->add('/;+\\h*([};])/', '$2'); + // Apply all rules defined up to this point + $script = $parser->exec($script); + // If requested, make some vertical whitespace collapsing as well + if ( $collapseVertical ) { + // Collapse whitespaces between and after a ){ pair (function definitions) + $parser->add('/\\)\\s+\\{\\s+/', '){'); + // Collapse whitespaces between and after a ({ pair (JSON argument) + $parser->add('/\\(\\s+\\{\\s+/', '({'); + // Collapse whitespaces between a parenthesis and a period (call chaining) + $parser->add('/\\)\\s+\\./', ').'); + // Collapse vertical whitespaces which come directly after a semicolon or a comma + $parser->add('/([;,])\\s+/', '$2'); + // Collapse whitespaces between multiple parenthesis/brackets of similar direction + $parser->add('/([\\)\\}])\\s+([\\)\\}])/', '$2$3'); + $parser->add('/([\\(\\{])\\s+([\\(\\{])/', '$2$3'); + } + // Collapse horizontal whitespaces between variable names into a single space + $parser->add('/(\\b|\\x24)\\h+(\\b|\\x24)/', '$2 $3'); + // Collapse horizontal whitespaces between urinary operators into a single space + $parser->add('/([+\\-])\\h+([+\\-])/', '$2 $3'); + // Collapse all remaining un-protected horizontal whitespace + $parser->add('/\\h+/', ''); + // Collapse multiple vertical whitespaces with some horizontal spaces between them + $parser->add('/\\v+\\h*\\v*/', "\n"); + + // Done + return $parser->exec($script); + + } +} diff --git a/includes/libs/ParseMaster.php b/includes/libs/ParseMaster.php new file mode 100644 index 0000000000..a95600ecbc --- /dev/null +++ b/includes/libs/ParseMaster.php @@ -0,0 +1,214 @@ +GROUPS, $this->_internalEscape((string)$expression), $out); + + // treat only strings $replacement + if (is_string($replacement)) { + // does the pattern deal with sub-expressions? + if (preg_match($this->SUB_REPLACE, $replacement)) { + // a simple lookup? (e.g. "$2") + if (preg_match($this->INDEXED, $replacement)) { + // store the index (used for fast retrieval of matched strings) + $replacement = (int)(substr($replacement, 1)) - 1; + } else { // a complicated lookup (e.g. "Hello $2 $1") + // build a function to do the lookup + $quote = preg_match($this->QUOTE, $this->_internalEscape($replacement)) + ? '"' : "'"; + $replacement = array( + 'fn' => '_backReferences', + 'data' => array( + 'replacement' => $replacement, + 'length' => $length, + 'quote' => $quote + ) + ); + } + } + } + // pass the modified arguments + if (!empty($expression)) $this->_add($expression, $replacement, $length); + else $this->_add('/^$/', $replacement, $length); + } + + public function exec($string) { + // execute the global replacement + $this->_escaped = array(); + + // simulate the _patterns.toSTring of Dean + $regexp = '/'; + foreach ($this->_patterns as $reg) { + $regexp .= '(' . substr($reg[self::EXPRESSION], 1, -1) . ')|'; + } + $regexp = substr($regexp, 0, -1) . '/'; + $regexp .= ($this->ignoreCase) ? 'i' : ''; + + $string = $this->_escape($string, $this->escapeChar); + $string = preg_replace_callback( + $regexp, + array( + &$this, + '_replacement' + ), + $string + ); + $string = $this->_unescape($string, $this->escapeChar); + + return preg_replace($this->DELETED, '', $string); + } + + public function reset() { + // clear the patterns collection so that this object may be re-used + $this->_patterns = array(); + } + + // private + private $_escaped = array(); // escaped characters + private $_patterns = array(); // patterns stored by index + + // create and add a new pattern to the patterns collection + private function _add() { + $arguments = func_get_args(); + $this->_patterns[] = $arguments; + } + + // this is the global replace function (it's quite complicated) + private function _replacement($arguments) { + if (empty($arguments)) return ''; + + $i = 1; $j = 0; + // loop through the patterns + while (isset($this->_patterns[$j])) { + $pattern = $this->_patterns[$j++]; + // do we have a result? + if (isset($arguments[$i]) && ($arguments[$i] != '')) { + $replacement = $pattern[self::REPLACEMENT]; + + if (is_array($replacement) && isset($replacement['fn'])) { + + if (isset($replacement['data'])) $this->buffer = $replacement['data']; + return call_user_func(array(&$this, $replacement['fn']), $arguments, $i); + + } elseif (is_int($replacement)) { + return $arguments[$replacement + $i]; + + } + $delete = ($this->escapeChar == '' || + strpos($arguments[$i], $this->escapeChar) === false) + ? '' : "\x01" . $arguments[$i] . "\x01"; + return $delete . $replacement; + + // skip over references to sub-expressions + } else { + $i += $pattern[self::LENGTH]; + } + } + } + + private function _backReferences($match, $offset) { + $replacement = $this->buffer['replacement']; + $quote = $this->buffer['quote']; + $i = $this->buffer['length']; + while ($i) { + $replacement = str_replace('$'.$i--, $match[$offset + $i], $replacement); + } + return $replacement; + } + + private function _replace_name($match, $offset){ + $length = strlen($match[$offset + 2]); + $start = $length - max($length - strlen($match[$offset + 3]), 0); + return substr($match[$offset + 1], $start, $length) . $match[$offset + 4]; + } + + private function _replace_encoded($match, $offset) { + return $this->buffer[$match[$offset]]; + } + + + // php : we cannot pass additional data to preg_replace_callback, + // and we cannot use &$this in create_function, so let's go to lower level + private $buffer; + + // encode escaped characters + private function _escape($string, $escapeChar) { + if ($escapeChar) { + $this->buffer = $escapeChar; + return preg_replace_callback( + '/\\' . $escapeChar . '(.)' .'/', + array(&$this, '_escapeBis'), + $string + ); + + } else { + return $string; + } + } + private function _escapeBis($match) { + $this->_escaped[] = $match[1]; + return $this->buffer; + } + + // decode escaped characters + private function _unescape($string, $escapeChar) { + if ($escapeChar) { + $regexp = '/'.'\\'.$escapeChar.'/'; + $this->buffer = array('escapeChar'=> $escapeChar, 'i' => 0); + return preg_replace_callback + ( + $regexp, + array(&$this, '_unescapeBis'), + $string + ); + + } else { + return $string; + } + } + private function _unescapeBis() { + if (isset($this->_escaped[$this->buffer['i']]) + && $this->_escaped[$this->buffer['i']] != '') + { + $temp = $this->_escaped[$this->buffer['i']]; + } else { + $temp = ''; + } + $this->buffer['i']++; + return $this->buffer['escapeChar'] . $temp; + } + + private function _internalEscape($string) { + return preg_replace($this->ESCAPE, '', $string); + } +} diff --git a/includes/resourceloader/ResourceLoader.php b/includes/resourceloader/ResourceLoader.php index 11bfdaacba..ba5ae11b95 100644 --- a/includes/resourceloader/ResourceLoader.php +++ b/includes/resourceloader/ResourceLoader.php @@ -108,7 +108,7 @@ class ResourceLoader { * Runs JavaScript or CSS data through a filter, caching the filtered result for future calls. * * Available filters are: - * - minify-js \see JSMin::minify + * - minify-js \see JavaScriptDistiller::stripWhiteSpace * - minify-css \see CSSMin::minify * * If $data is empty, only contains whitespace or the filter was unknown, @@ -119,6 +119,8 @@ class ResourceLoader { * @return String: Filtered data, or a comment containing an error message */ protected function filter( $filter, $data ) { + global $wgResourceLoaderMinifyJSVerticalSpace; + wfProfileIn( __METHOD__ ); // For empty/whitespace-only data or for unknown filters, don't perform @@ -144,7 +146,9 @@ class ResourceLoader { try { switch ( $filter ) { case 'minify-js': - $result = JSMin::minify( $data ); + $result = JavaScriptDistiller::stripWhiteSpace( + $data, $wgResourceLoaderMinifyJSVerticalSpace + ); break; case 'minify-css': $result = CSSMin::minify( $data ); -- 2.20.1