From 430f653e72c9f1b5d168a0f50dcc9c4286a87d05 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Fri, 18 Feb 2011 07:36:11 +0000 Subject: [PATCH] * For readability, use /x in regexes. Removed the start and end slashes from the input to ParseMaster::add(), to avoid confusion over the lack of a /x modifier in that input. * Broke up several regexes and added comments, so that humans might be able to understand them. * Tested md5(minify(jquery.js)) as before. Checked speed, it's the same. --- includes/libs/JavaScriptDistiller.php | 75 ++++++++++++++++++++------- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/includes/libs/JavaScriptDistiller.php b/includes/libs/JavaScriptDistiller.php index b377ac59d5..1c44e6bfa5 100644 --- a/includes/libs/JavaScriptDistiller.php +++ b/includes/libs/JavaScriptDistiller.php @@ -31,13 +31,13 @@ class JavaScriptDistiller { public static function stripHorizontalSpace( $script ) { $parser = self::createParser(); // Collapse horizontal whitespaces between variable names into a single space - $parser->add( '/(\b|\$)[ \t]+(\b|\$)/', '$2 $3' ); + $parser->add( '(\b|\$) [ \t]+ (\b|\$)', '$2 $3' ); // Collapse horizontal whitespaces between unary operators into a single space - $parser->add( '/([+\-])[ \t]+([+\-])/', '$2 $3' ); + $parser->add( '([+\-]) [ \t]+ ([+\-])', '$2 $3' ); // Remove all remaining un-protected horizontal whitespace - $parser->add( '/[ \t]+/'); + $parser->add( '[ \t]+'); // Collapse multiple vertical whitespaces with some horizontal spaces between them - $parser->add( '/[\r\n]+[ \t]*[\r\n]+/', "\n" ); + $parser->add( '[\r\n]+ [ \t]* [\r\n]+', "\n" ); // Execute and return return $parser->exec($script); } @@ -45,16 +45,16 @@ class JavaScriptDistiller { public static function stripVerticalSpace( $script ) { $parser = self::createParser(); // Collapse whitespaces between and after a ){ pair (function definitions) - $parser->add( '/\)\s+\{\s+/', '){' ); + $parser->add( '\) \s+ \{ \s+', '){' ); // Collapse whitespaces between and after a ({ pair (JSON argument) - $parser->add( '/\(\s+\{\s+/', '({' ); + $parser->add( '\( \s+ \{ \s+', '({' ); // Collapse whitespaces between a parenthesis and a period (call chaining) - $parser->add( '/\)\s+\./', ').'); + $parser->add( '\) \s+ \.', ').'); // Collapse vertical whitespaces which come directly after a semicolon or a comma - $parser->add( '/([;,])\s+/', '$2' ); + $parser->add( '( [;,] ) \s+', '$2' ); // Collapse whitespaces between multiple parenthesis/brackets of similar direction - $parser->add( '/([\)\}])\s+([\)\}])/', '$2$3' ); - $parser->add( '/([\(\{])\\s+([\(\{])/', '$2$3' ); + $parser->add( '( [\)\}] ) \s+ ( [\)\}] )', '$2$3' ); + $parser->add( '( [\(\{] ) \s+ ( [\(\{] )', '$2$3' ); return $parser->exec( $script ); } @@ -77,15 +77,52 @@ class JavaScriptDistiller { // Protect strings. The original code had [^\'\\v] here, but that didn't armor multiline // strings correctly. This also armors multiline strings that don't have backslashes at the // end of the line (these are invalid), but that's fine because we're just armoring here. - $parser->add( '/\'([^\'\\\\]*(\\\\(.|[\r\n])[^\'\\\\]*)*)\'/', '$1' ); - $parser->add( '/"([^"\\\\]*(\\\\(.|[\r\n])[^"\\\\]*)*)"/', '$1' ); + + // Single quotes + $parser->add( + '\' (' . // start quote + '[^\'\\\\]*' . // a run of non-special characters + '(' . + '\\\\ ( . | [\r\n] )' . // a backslash followed by a character or line ending + '[^\'\\\\]*' . // a run of non-special characters + ')*' . // any number of the above + ') \'', // end quote + '$1' ); + + // Double quotes: same as above + $parser->add( '" ( [^"\\\\]* ( \\\\ ( . | [\r\n] ) [^"\\\\]* )* ) "', '$1' ); + // Protect regular expressions - $parser->add( '/[ \t]+((\/[^\r\n\*][^\/\r\n\\\\]*(\\\\.[^\/\r\n\\\\]*)*\/(i|g)*))/', '$1' ); - $parser->add( '/([^\w\$\/\'"*)\?:](\/[^\r\n\*][^\/\r\n\\\\]*(\\\\.[^\/\r\n\\\\]*)*\/(i|g)*))/', '$1' ); - // Remove comments - $parser->add( '/\/\*(.|[\r\n])*?\*\//' ); + // Regular expression with whitespace before it + $parser->add( + '[ \t]+ ( ( \/' . // whitespace then start slash + '[^\r\n\*]' . // not a comment-start or line ending + '[^\/\r\n\\\\]*' . // a sequence of non-special characters + '(' . + '\\\\.' . // an escaped dot + '[^\/\r\n\\\\]*' . // a sequence of non-special characters + ')*' . // any number of the above + '\/(i|g)*' . // pattern end, optional modifier + ') )', + '$1' ); + // Regular expression with an operator before it + $parser->add( + '( [^\w\$\/\'"*)\?:] (\/' . // certain kinds of punctuation and then start slash + '[^\r\n\*]' . // not a comment-start or line ending + '[^\/\r\n\\\\]*' . // a sequence of non-special characters + '(' . + '\\\\.' . // an escaped dot + '[^\/\r\n\\\\]*' . // a sequence of non-special characters + ')*' . // any number of the above + '\/(i|g)*)' . // pattern end, optional modifier + ')', + '$1' ); + + // C-style comment: use non-greedy repetition to find the end + $parser->add( '\/ \* ( . | [\r\n] )*? \* \/' ); + // Preserve the newline after a C++-style comment -- bug 27046 - $parser->add( '/\/\/[^\r\n]*([\r\n])/', '$2' ); + $parser->add( '\/ \/ [^\r\n]* ( [\r\n] )', '$2' ); return $parser; } } @@ -160,9 +197,9 @@ class ParseMaster { // simulate the _patterns.toSTring of Dean $regexp = '/'; foreach ($this->_patterns as $reg) { - $regexp .= '(' . substr($reg[self::EXPRESSION], 1, -1) . ')|'; + $regexp .= '(' . $reg[self::EXPRESSION] . ')|'; } - $regexp = substr($regexp, 0, -1) . '/S'; + $regexp = substr($regexp, 0, -1) . '/Sx'; $regexp .= ($this->ignoreCase) ? 'i' : ''; $string = $this->_escape($string, $this->escapeChar); -- 2.20.1