(bug 27528) Incorporate Paul Copperman's minifier
authorRoan Kattouw <catrope@users.mediawiki.org>
Mon, 14 Mar 2011 11:44:33 +0000 (11:44 +0000)
committerRoan Kattouw <catrope@users.mediawiki.org>
Mon, 14 Mar 2011 11:44:33 +0000 (11:44 +0000)
includes/AutoLoader.php
includes/libs/JavaScriptMinifier.php [new file with mode: 0644]
includes/resourceloader/ResourceLoader.php
tests/phpunit/includes/libs/JavaScriptMinifierTest.php [new file with mode: 0644]

index 16393bc..6542992 100644 (file)
@@ -135,6 +135,7 @@ $wgAutoloadLocalClasses = array(
        'Interwiki' => 'includes/Interwiki.php',
        'IP' => 'includes/IP.php',
        'JavaScriptDistiller' => 'includes/libs/JavaScriptDistiller.php',
+       'JavaScriptMinifier' => 'includes/libs/JavaScriptMinifier.php',
        'LCStore_DB' => 'includes/LocalisationCache.php',
        'LCStore_CDB' => 'includes/LocalisationCache.php',
        'LCStore_Null' => 'includes/LocalisationCache.php',
diff --git a/includes/libs/JavaScriptMinifier.php b/includes/libs/JavaScriptMinifier.php
new file mode 100644 (file)
index 0000000..4c7256d
--- /dev/null
@@ -0,0 +1,530 @@
+<?php
+/**
+ * JavaScript Minifier
+ *
+ * This class is meant to safely minify javascript code, while leaving syntactically correct
+ * programs intact. Other libraries, such as JSMin require a certain coding style to work
+ * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather
+ * slow, because they construct a complete parse tree before outputting the code minified.
+ * So this class is meant to allow arbitrary (but syntactically correct) input, while being
+ * fast enough to be used for on-the-fly minifying.
+ *
+ * Author: Paul Copperman <paul.copperman@gmail.com>
+ * License: choose any of Apache, MIT, GPL, LGPL
+ */
+
+class JavaScriptMinifier {
+
+       /* Class constants */
+       /* Parsing states.
+        * The state machine is only necessary to decide whether to parse a slash as division
+        * operator or as regexp literal.
+        * States are named after the next expected item. We only distinguish states when the
+        * distinction is relevant for our purpose.
+        */
+       const STATEMENT                = 0;
+       const CONDITION                = 1;
+       const PROPERTY_ASSIGNMENT      = 2;
+       const EXPRESSION               = 3;
+       const EXPRESSION_NO_NL         = 4; // only relevant for semicolon insertion
+       const EXPRESSION_OP            = 5;
+       const EXPRESSION_FUNC          = 6;
+       const EXPRESSION_TERNARY       = 7; // used to determine the role of a colon
+       const EXPRESSION_TERNARY_OP    = 8;
+       const EXPRESSION_TERNARY_FUNC  = 9;
+       const PAREN_EXPRESSION         = 10; // expression which is not on the top level
+       const PAREN_EXPRESSION_OP      = 11;
+       const PAREN_EXPRESSION_FUNC    = 12;
+       const PROPERTY_EXPRESSION      = 13; // expression which is within an object literal
+       const PROPERTY_EXPRESSION_OP   = 14;
+       const PROPERTY_EXPRESSION_FUNC = 15;
+
+       /* Token types */
+       const TYPE_UN_OP       = 1; // unary operators
+       const TYPE_INCR_OP     = 2; // ++ and --
+       const TYPE_BIN_OP      = 3; // binary operators
+       const TYPE_ADD_OP      = 4; // + and - which can be either unary or binary ops
+       const TYPE_HOOK        = 5; // ?
+       const TYPE_COLON       = 6; // :
+       const TYPE_COMMA       = 7; // ,
+       const TYPE_SEMICOLON   = 8; // ;
+       const TYPE_BRACE_OPEN  = 9; // {
+       const TYPE_BRACE_CLOSE = 10; // }
+       const TYPE_PAREN_OPEN  = 11; // ( and [
+       const TYPE_PAREN_CLOSE = 12; // ) and ]
+       const TYPE_RETURN      = 13; // keywords: break, continue, return, throw
+       const TYPE_IF          = 14; // keywords: catch, for, with, switch, while, if
+       const TYPE_DO          = 15; // keywords: case, var, finally, else, do, try
+       const TYPE_FUNC        = 16; // keywords: function
+       const TYPE_LITERAL     = 17; // all literals, identifiers and unrecognised tokens
+       
+       // Sanity limit to avoid excessive memory usage
+       const STACK_LIMIT = 1000;
+
+       /* Static functions */
+
+       /**
+        * Returns minified JavaScript code.
+        *
+        * @param $s String JavaScript code to minify
+        * @return String Minified code
+        */
+       public static function minify( $s ) {
+               // First we declare a few tables that contain our parsing rules
+
+               // $opChars : characters, which can be combined without whitespace in between them
+               $opChars = array(
+                       '!' => true,
+                       '"' => true,
+                       '%' => true,
+                       '&' => true,
+                       "'" => true,
+                       '(' => true,
+                       ')' => true,
+                       '*' => true,
+                       '+' => true,
+                       ',' => true,
+                       '-' => true,
+                       '.' => true,
+                       '/' => true,
+                       ':' => true,
+                       ';' => true,
+                       '<' => true,
+                       '=' => true,
+                       '>' => true,
+                       '?' => true,
+                       '[' => true,
+                       ']' => true,
+                       '^' => true,
+                       '{' => true,
+                       '|' => true,
+                       '}' => true,
+                       '~' => true
+               );
+
+               // $tokenTypes : maps keywords and operators to their corresponding token type
+               $tokenTypes = array(
+                       '!'          => self::TYPE_UN_OP,
+                       '~'          => self::TYPE_UN_OP,
+                       'delete'     => self::TYPE_UN_OP,
+                       'new'        => self::TYPE_UN_OP,
+                       'typeof'     => self::TYPE_UN_OP,
+                       'void'       => self::TYPE_UN_OP,
+                       '++'         => self::TYPE_INCR_OP,
+                       '--'         => self::TYPE_INCR_OP,
+                       '!='         => self::TYPE_BIN_OP,
+                       '!=='        => self::TYPE_BIN_OP,
+                       '%'          => self::TYPE_BIN_OP,
+                       '%='         => self::TYPE_BIN_OP,
+                       '&'          => self::TYPE_BIN_OP,
+                       '&&'         => self::TYPE_BIN_OP,
+                       '&='         => self::TYPE_BIN_OP,
+                       '*'          => self::TYPE_BIN_OP,
+                       '*='         => self::TYPE_BIN_OP,
+                       '+='         => self::TYPE_BIN_OP,
+                       '-='         => self::TYPE_BIN_OP,
+                       '.'          => self::TYPE_BIN_OP,
+                       '/'          => self::TYPE_BIN_OP,
+                       '/='         => self::TYPE_BIN_OP,
+                       '<'          => self::TYPE_BIN_OP,
+                       '<<'         => self::TYPE_BIN_OP,
+                       '<<='        => self::TYPE_BIN_OP,
+                       '<='         => self::TYPE_BIN_OP,
+                       '='          => self::TYPE_BIN_OP,
+                       '=='         => self::TYPE_BIN_OP,
+                       '==='        => self::TYPE_BIN_OP,
+                       '>'          => self::TYPE_BIN_OP,
+                       '>='         => self::TYPE_BIN_OP,
+                       '>>'         => self::TYPE_BIN_OP,
+                       '>>='        => self::TYPE_BIN_OP,
+                       '>>>'        => self::TYPE_BIN_OP,
+                       '>>>='       => self::TYPE_BIN_OP,
+                       '^'          => self::TYPE_BIN_OP,
+                       '^='         => self::TYPE_BIN_OP,
+                       '|'          => self::TYPE_BIN_OP,
+                       '|='         => self::TYPE_BIN_OP,
+                       '||'         => self::TYPE_BIN_OP,
+                       'in'         => self::TYPE_BIN_OP,
+                       'instanceof' => self::TYPE_BIN_OP,
+                       '+'          => self::TYPE_ADD_OP,
+                       '-'          => self::TYPE_ADD_OP,
+                       '?'          => self::TYPE_HOOK,
+                       ':'          => self::TYPE_COLON,
+                       ','          => self::TYPE_COMMA,
+                       ';'          => self::TYPE_SEMICOLON,
+                       '{'          => self::TYPE_BRACE_OPEN,
+                       '}'          => self::TYPE_BRACE_CLOSE,
+                       '('          => self::TYPE_PAREN_OPEN,
+                       '['          => self::TYPE_PAREN_OPEN,
+                       ')'          => self::TYPE_PAREN_CLOSE,
+                       ']'          => self::TYPE_PAREN_CLOSE,
+                       'break'      => self::TYPE_RETURN,
+                       'continue'   => self::TYPE_RETURN,
+                       'return'     => self::TYPE_RETURN,
+                       'throw'      => self::TYPE_RETURN,
+                       'catch'      => self::TYPE_IF,
+                       'for'        => self::TYPE_IF,
+                       'if'         => self::TYPE_IF,
+                       'switch'     => self::TYPE_IF,
+                       'while'      => self::TYPE_IF,
+                       'with'       => self::TYPE_IF,
+                       'case'       => self::TYPE_DO,
+                       'do'         => self::TYPE_DO,
+                       'else'       => self::TYPE_DO,
+                       'finally'    => self::TYPE_DO,
+                       'try'        => self::TYPE_DO,
+                       'var'        => self::TYPE_DO,
+                       'function'   => self::TYPE_FUNC
+               );
+
+               // $goto : This is the main table for our state machine. For every state/token pair
+               //         the following state is defined. When no rule exists for a given pair,
+               //         the state is left unchanged.
+               $goto = array(
+                       self::STATEMENT => array(
+                               self::TYPE_UN_OP      => self::EXPRESSION,
+                               self::TYPE_INCR_OP    => self::EXPRESSION,
+                               self::TYPE_ADD_OP     => self::EXPRESSION,
+                               self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+                               self::TYPE_RETURN     => self::EXPRESSION_NO_NL,
+                               self::TYPE_IF         => self::CONDITION,
+                               self::TYPE_FUNC       => self::CONDITION,
+                               self::TYPE_LITERAL    => self::EXPRESSION_OP
+                       ),
+                       self::CONDITION => array(
+                               self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
+                       ),
+                       self::PROPERTY_ASSIGNMENT => array(
+                               self::TYPE_COLON      => self::PROPERTY_EXPRESSION,
+                               self::TYPE_BRACE_OPEN => self::STATEMENT
+                       ),
+                       self::EXPRESSION => array(
+                               self::TYPE_SEMICOLON  => self::STATEMENT,
+                               self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
+                               self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+                               self::TYPE_FUNC       => self::EXPRESSION_FUNC,
+                               self::TYPE_LITERAL    => self::EXPRESSION_OP
+                       ),
+                       self::EXPRESSION_NO_NL => array(
+                               self::TYPE_SEMICOLON  => self::STATEMENT,
+                               self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
+                               self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+                               self::TYPE_FUNC       => self::EXPRESSION_FUNC,
+                               self::TYPE_LITERAL    => self::EXPRESSION_OP
+                       ),
+                       self::EXPRESSION_OP => array(
+                               self::TYPE_BIN_OP     => self::EXPRESSION,
+                               self::TYPE_ADD_OP     => self::EXPRESSION,
+                               self::TYPE_HOOK       => self::EXPRESSION_TERNARY,
+                               self::TYPE_COLON      => self::STATEMENT,
+                               self::TYPE_COMMA      => self::EXPRESSION,
+                               self::TYPE_SEMICOLON  => self::STATEMENT,
+                               self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
+                       ),
+                       self::EXPRESSION_FUNC => array(
+                               self::TYPE_BRACE_OPEN => self::STATEMENT
+                       ),
+                       self::EXPRESSION_TERNARY => array(
+                               self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
+                               self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+                               self::TYPE_FUNC       => self::EXPRESSION_TERNARY_FUNC,
+                               self::TYPE_LITERAL    => self::EXPRESSION_TERNARY_OP
+                       ),
+                       self::EXPRESSION_TERNARY_OP => array(
+                               self::TYPE_BIN_OP     => self::EXPRESSION_TERNARY,
+                               self::TYPE_ADD_OP     => self::EXPRESSION_TERNARY,
+                               self::TYPE_HOOK       => self::EXPRESSION_TERNARY,
+                               self::TYPE_COMMA      => self::EXPRESSION_TERNARY,
+                               self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
+                       ),
+                       self::EXPRESSION_TERNARY_FUNC => array(
+                               self::TYPE_BRACE_OPEN => self::STATEMENT
+                       ),
+                       self::PAREN_EXPRESSION => array(
+                               self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
+                               self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+                               self::TYPE_FUNC       => self::PAREN_EXPRESSION_FUNC,
+                               self::TYPE_LITERAL    => self::PAREN_EXPRESSION_OP
+                       ),
+                       self::PAREN_EXPRESSION_OP => array(
+                               self::TYPE_BIN_OP     => self::PAREN_EXPRESSION,
+                               self::TYPE_ADD_OP     => self::PAREN_EXPRESSION,
+                               self::TYPE_HOOK       => self::PAREN_EXPRESSION,
+                               self::TYPE_COLON      => self::PAREN_EXPRESSION,
+                               self::TYPE_COMMA      => self::PAREN_EXPRESSION,
+                               self::TYPE_SEMICOLON  => self::PAREN_EXPRESSION,
+                               self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
+                       ),
+                       self::PAREN_EXPRESSION_FUNC => array(
+                               self::TYPE_BRACE_OPEN => self::STATEMENT
+                       ),
+                       self::PROPERTY_EXPRESSION => array(
+                               self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
+                               self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
+                               self::TYPE_FUNC       => self::PROPERTY_EXPRESSION_FUNC,
+                               self::TYPE_LITERAL    => self::PROPERTY_EXPRESSION_OP
+                       ),
+                       self::PROPERTY_EXPRESSION_OP => array(
+                               self::TYPE_BIN_OP     => self::PROPERTY_EXPRESSION,
+                               self::TYPE_ADD_OP     => self::PROPERTY_EXPRESSION,
+                               self::TYPE_HOOK       => self::PROPERTY_EXPRESSION,
+                               self::TYPE_COMMA      => self::PROPERTY_ASSIGNMENT,
+                               self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
+                       ),
+                       self::PROPERTY_EXPRESSION_FUNC => array(
+                               self::TYPE_BRACE_OPEN => self::STATEMENT
+                       )
+               );
+
+               // $push : This table contains the rules for when to push a state onto the stack.
+               //         The pushed state is the state to return to when the corresponding
+               //         closing token is found
+               $push = array(
+                       self::STATEMENT => array(
+                               self::TYPE_BRACE_OPEN => self::STATEMENT,
+                               self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
+                       ),
+                       self::CONDITION => array(
+                               self::TYPE_PAREN_OPEN => self::STATEMENT
+                       ),
+                       self::PROPERTY_ASSIGNMENT => array(
+                               self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT
+                       ),
+                       self::EXPRESSION => array(
+                               self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
+                               self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
+                       ),
+                       self::EXPRESSION_NO_NL => array(
+                               self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
+                               self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
+                       ),
+                       self::EXPRESSION_OP => array(
+                               self::TYPE_HOOK       => self::EXPRESSION,
+                               self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
+                       ),
+                       self::EXPRESSION_FUNC => array(
+                               self::TYPE_BRACE_OPEN => self::EXPRESSION_OP
+                       ),
+                       self::EXPRESSION_TERNARY => array(
+                               self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP,
+                               self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
+                       ),
+                       self::EXPRESSION_TERNARY_OP => array(
+                               self::TYPE_HOOK       => self::EXPRESSION_TERNARY,
+                               self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
+                       ),
+                       self::EXPRESSION_TERNARY_FUNC => array(
+                               self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP
+                       ),
+                       self::PAREN_EXPRESSION => array(
+                               self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP,
+                               self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
+                       ),
+                       self::PAREN_EXPRESSION_OP => array(
+                               self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
+                       ),
+                       self::PAREN_EXPRESSION_FUNC => array(
+                               self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP
+                       ),
+                       self::PROPERTY_EXPRESSION => array(
+                               self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
+                               self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
+                       ),
+                       self::PROPERTY_EXPRESSION_OP => array(
+                               self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
+                       ),
+                       self::PROPERTY_EXPRESSION_FUNC => array(
+                               self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP
+                       )
+               );
+
+               // $pop : Rules for when to pop a state from the stack
+               $pop = array(
+                       self::STATEMENT              => array( self::TYPE_BRACE_CLOSE => true ),
+                       self::PROPERTY_ASSIGNMENT    => array( self::TYPE_BRACE_CLOSE => true ),
+                       self::EXPRESSION             => array( self::TYPE_BRACE_CLOSE => true ),
+                       self::EXPRESSION_NO_NL       => array( self::TYPE_BRACE_CLOSE => true ),
+                       self::EXPRESSION_OP          => array( self::TYPE_BRACE_CLOSE => true ),
+                       self::EXPRESSION_TERNARY_OP  => array( self::TYPE_COLON       => true ),
+                       self::PAREN_EXPRESSION       => array( self::TYPE_PAREN_CLOSE => true ),
+                       self::PAREN_EXPRESSION_OP    => array( self::TYPE_PAREN_CLOSE => true ),
+                       self::PROPERTY_EXPRESSION    => array( self::TYPE_BRACE_CLOSE => true ),
+                       self::PROPERTY_EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true )
+               );
+
+               // $semicolon : Rules for when a semicolon insertion is appropriate
+               $semicolon = array(
+                       self::EXPRESSION_NO_NL => array(
+                               self::TYPE_UN_OP      => true,
+                               self::TYPE_INCR_OP    => true,
+                               self::TYPE_ADD_OP     => true,
+                               self::TYPE_BRACE_OPEN => true,
+                               self::TYPE_PAREN_OPEN => true,
+                               self::TYPE_RETURN     => true,
+                               self::TYPE_IF         => true,
+                               self::TYPE_DO         => true,
+                               self::TYPE_FUNC       => true,
+                               self::TYPE_LITERAL    => true
+                       ),
+                       self::EXPRESSION_OP => array(
+                               self::TYPE_UN_OP      => true,
+                               self::TYPE_INCR_OP    => true,
+                               self::TYPE_BRACE_OPEN => true,
+                               self::TYPE_RETURN     => true,
+                               self::TYPE_IF         => true,
+                               self::TYPE_DO         => true,
+                               self::TYPE_FUNC       => true,
+                               self::TYPE_LITERAL    => true
+                       )
+               );
+
+               // $divStates : Contains all states that can be followed by a division operator
+               $divStates = array(
+                       self::EXPRESSION_OP          => true,
+                       self::EXPRESSION_TERNARY_OP  => true,
+                       self::PAREN_EXPRESSION_OP    => true,
+                       self::PROPERTY_EXPRESSION_OP => true
+               );
+
+               // Here's where the minifying takes place: Loop through the input, looking for tokens
+               // and output them to $out, taking actions to the above defined rules when appropriate.
+               $out = '';
+               $pos = 0;
+               $length = strlen( $s );
+               $newlineFound = true;
+               $state = self::STATEMENT;
+               $stack = array();
+               $last = ';'; // Pretend that we have seen a semicolon yet
+               while( $pos < $length ) {
+                       // First, skip over any whitespace and multiline comments, recording whether we
+                       // found any newline character
+                       $skip = strspn( $s, " \t\n\r\v\f", $pos );
+                       if( !$skip ) {
+                               $ch = $s[$pos];
+                               if( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
+                                       // Multiline comment. Search for the end token or EOT.
+                                       $end = strpos( $s, '*/', $pos + 2 );
+                                       $skip = $end === false ? $length - $pos : $end - $pos + 2;
+                               }
+                       }
+                       if( $skip ) {
+                               // The semicolon insertion mechanism needs to know whether there was a newline
+                               // between two tokens, so record it now.
+                               if( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
+                                       $newlineFound = true;
+                               }
+                               $pos += $skip;
+                               continue;
+                       }
+                       // Handle C++-style comments and html comments, which are treated as single line
+                       // comments by the browser, regardless of whether the end tag is on the same line.
+                       // Handle --> the same way, but only if it's at the beginning of the line
+                       if( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
+                               || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
+                               || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
+                       ) {
+                               $pos += strcspn( $s, "\r\n", $pos );
+                               continue;
+                       }
+
+                       // Find out which kind of token we're handling. $end will point past the end of it.
+                       $end = $pos + 1;
+                       // Handle string literals
+                       if( $ch === "'" || $ch === '"' ) {
+                               // Search to the end of the string literal, skipping over backslash escapes
+                               $search = $ch . '\\';
+                               do{
+                                       $end += strcspn( $s, $search, $end ) + 2;
+                               } while( $end - 2 < $length && $s[$end - 2] === '\\' );
+                               $end--;
+                       // We have to distinguish between regexp literals and division operators
+                       // A division operator is only possible in certain states
+                       } elseif( $ch === '/' && !isset( $divStates[$state] ) ) {
+                               // Regexp literal, search to the end, skipping over backslash escapes and
+                               // character classes
+                               for( ; ; ) {
+                                       do{
+                                               $end += strcspn( $s, '/[\\', $end ) + 2;
+                                       } while( $end - 2 < $length && $s[$end - 2] === '\\' );
+                                       $end--;
+                                       if( $end - 1 >= $length || $s[$end - 1] === '/' ) {
+                                               break;
+                                       }
+                                       do{
+                                               $end += strcspn( $s, ']\\', $end ) + 2;
+                                       } while( $end - 2 < $length && $s[$end - 2] === '\\' );
+                                       $end--;
+                               };
+                               // Search past the regexp modifiers (gi)
+                               while( $end < $length && ctype_alpha( $s[$end] ) ) {
+                                       $end++;
+                               }
+                       } elseif(
+                               ctype_digit( $ch )
+                               || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
+                       ) {
+                               // Numeric literal. Search for the end of it, but don't care about [+-]exponent
+                               // at the end, as the results of "numeric [+-] numeric" and "numeric" are
+                               // identical to our state machine.
+                               $end += strspn( $s, '0123456789ABCDEFabcdefXx.', $end );
+                               while( $s[$end - 1] === '.' ) {
+                                       // Special case: When a numeric ends with a dot, we have to check the 
+                                       // literal for proper syntax
+                                       $decimal = strspn( $s, '0123456789', $pos, $end - $pos - 1 );
+                                       if( $decimal === $end - $pos - 1 ) {
+                                               break;
+                                       } else {
+                                               $end--;
+                                       }
+                               }
+                       } elseif( isset( $opChars[$ch] ) ) {
+                               // Punctuation character. Search for the longest matching operator.
+                               while(
+                                       $end < $length
+                                       && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
+                               ) {
+                                       $end++;
+                               }
+                       } else {
+                               // Identifier or reserved word. Search for the end by excluding whitespace and
+                               // punctuation.
+                               $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\f\v\r", $end );
+                       }
+
+                       // Now get the token type from our type array
+                       $token = substr( $s, $pos, $end - $pos );
+                       $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL;
+
+                       if( $newlineFound && isset( $semicolon[$state][$type] ) ) {
+                               // This token triggers the semicolon insertion mechanism of javascript. While we
+                               // could add the ; token here ourselves, keeping the newline has a few advantages.
+                               $out .= "\n";
+                               $state = self::STATEMENT;
+                       } elseif( false /* Put your newline condition here */ ) {
+                               $out .= "\n";
+                       // Check, whether we have to separate the token from the last one with whitespace
+                       } elseif( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
+                               $out .= ' ';
+                       // Don't accidentally create ++, -- or // tokens
+                       } elseif( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
+                               $out .= ' ';
+                       }
+                       
+                       $out .= $token;
+                       $last = $s[$end - 1];
+                       $pos = $end;
+                       $newlineFound = false;
+
+                       // Now that we have output our token, transition into the new state.
+                       if( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) {
+                               $stack[] = $push[$state][$type];
+                       }
+                       if( $stack && isset( $pop[$state][$type] ) ) {
+                               $state = array_pop( $stack );
+                       } elseif( isset( $goto[$state][$type] ) ) {
+                               $state = $goto[$state][$type];
+                       }
+               }
+               return $out;
+       }
+}
index 4c3d978..e376b6a 100644 (file)
@@ -29,7 +29,7 @@
 class ResourceLoader {
 
        /* Protected Static Members */
-       protected static $filterCacheVersion = 1;
+       protected static $filterCacheVersion = 2;
 
        /** Array: List of module name/ResourceLoaderModule object pairs */
        protected $modules = array();
@@ -110,7 +110,7 @@ class ResourceLoader {
         * Runs JavaScript or CSS data through a filter, caching the filtered result for future calls.
         * 
         * Available filters are:
-        *  - minify-js \see JavaScriptDistiller::stripWhiteSpace
+        *  - minify-js \see JavaScriptMinifier::minify
         *  - minify-css \see CSSMin::minify
         * 
         * If $data is empty, only contains whitespace or the filter was unknown, 
@@ -121,8 +121,6 @@ class ResourceLoader {
         * @return String: Filtered data, or a comment containing an error message
         */
        protected function filter( $filter, $data ) {
-               global $wgResourceLoaderMinifyJSVerticalSpace;
-
                wfProfileIn( __METHOD__ );
 
                // For empty/whitespace-only data or for unknown filters, don't perform 
@@ -149,9 +147,7 @@ class ResourceLoader {
                try {
                        switch ( $filter ) {
                                case 'minify-js':
-                                       $result = JavaScriptDistiller::stripWhiteSpace(
-                                               $data, $wgResourceLoaderMinifyJSVerticalSpace
-                                       );
+                                       $result = JavaScriptMinifier::minify( $data );
                                        $result .= "\n\n/* cache key: $key */\n";
                                        break;
                                case 'minify-css':
diff --git a/tests/phpunit/includes/libs/JavaScriptMinifierTest.php b/tests/phpunit/includes/libs/JavaScriptMinifierTest.php
new file mode 100644 (file)
index 0000000..82de53d
--- /dev/null
@@ -0,0 +1,70 @@
+<?php
+
+class JavaScriptMinifierTest extends MediaWikiTestCase {
+
+       function provideCases() {
+               return array(
+                       // Basic tokens
+                       array( "\r\t\f \v\n\r", "" ),
+                       array( "/* Foo *\n*bar\n*/", "" ),
+                       array( "'  Foo  \\'  bar  \\\n  baz  \\'  quox  '  .", "'  Foo  \\'  bar  \\\n  baz  \\'  quox  '." ),
+                       array( '\"  Foo  \\"  bar  \\\n  baz  \\"  quox  "  .', '\"  Foo  \\"  bar  \\\n  baz  \\"  quox  ".' ),
+                       array( "// Foo b/ar baz", "" ),
+                       array( "/  Foo  \\/  bar  [  /  \\]  /  ]  baz  /  .", "/  Foo  \\/  bar  [  /  \\]  /  ]  baz  /." ),
+                       // HTML comments
+                       array( "<!-- Foo bar", "" ),
+                       array( "<!-- Foo --> bar", "" ),
+                       array( "--> Foo", "" ),
+                       array( "x --> y", "x-->y" ),
+                       // Semicolon insertion
+                       array( "return\nx;", "return\nx;" ),
+                       array( "throw\nx;", "throw\nx;" ),
+                       array( "continue\nx;", "continue\nx;" ),
+                       array( "break\nx;", "break\nx;" ),
+                       array( "var\nx;", "var x;" ),
+                       array( "x\ny;", "x\ny;" ),
+                       array( "x\n++y;", "x\n++y;" ),
+                       array( "x\n!y;", "x\n!y;" ),
+                       array( "x\n{y}", "x\n{y}" ),
+                       array( "x\n+y;", "x+y;" ),
+                       array( "x\n(y);", "x(y);" ),
+                       array( "5.\nx;", "5.\nx;" ),
+                       array( "0xFF.\nx;", "0xFF.x;" ),
+                       array( "5.3.\nx;", "5.3.x;" ),
+                       // Token separation
+                       array( "x  in  y", "x in y" ),
+                       array( "/x/g  in  y", "/x/g in y" ),
+                       array( "x  in  30", "x in 30" ),
+                       array( "x  +  ++  y", "x+ ++y" ),
+                       array( "x  /  /y/.exec(z)", "x/ /y/.exec(z)" ),
+                       // State machine
+                       array( "/  x/g", "/  x/g" ),
+                       array( "return/  x/g", "return/  x/g" ),
+                       array( "+/  x/g", "+/  x/g" ),
+                       array( "++/  x/g", "++/  x/g" ),
+                       array( "x/  x/g", "x/x/g" ),
+                       array( "(/  x/g)", "(/  x/g)" ),
+                       array( "if(/  x/g);", "if(/  x/g);" ),
+                       array( "(x/  x/g)", "(x/x/g)" ),
+                       array( "([/  x/g])", "([/  x/g])" ),
+                       array( "+x/  x/g", "+x/x/g" ),
+                       array( "{}/  x/g", "{}/  x/g" ),
+                       array( "+{}/  x/g", "+{}/x/g" ),
+                       array( "(x)/  x/g", "(x)/x/g" ),
+                       array( "if(x)/  x/g", "if(x)/  x/g" ),
+                       array( "for(x;x;{}/  x/g);", "for(x;x;{}/x/g);" ),
+                       array( "x;x;{}/  x/g", "x;x;{}/  x/g" ),
+                       array( "x:{}/  x/g", "x:{}/  x/g" ),
+                       array( "switch(x){case y?z:{}/  x/g:{}/  x/g;}", "switch(x){case y?z:{}/x/g:{}/  x/g;}" ),
+                       array( "function x(){}/  x/g", "function x(){}/  x/g" ),
+                       array( "+function x(){}/  x/g", "+function x(){}/x/g" )
+               );
+       }
+
+       /**
+        * @dataProvider provideCases
+        */
+       function testJavaScriptMinifierOutput( $code, $expectedOutput ) {
+               $this->assertEquals( $expectedOutput, JavaScriptMinifier::minify( $code ) );
+       }
+}