From bb36c7b32acfcad09b9405fe2c3baa1a33deb1e8 Mon Sep 17 00:00:00 2001 From: Timo Tijhof Date: Wed, 3 May 2017 19:54:52 -0700 Subject: [PATCH] CSSMin: Support parenthesis and quotes in url references Previously they were often being cut short due to the url pattern ending at the first single quote, double quote or closing parenthesis regardless of which of those started the url match. Running benchmarkCSSMin.php before and after the change doesn't seem produce consistent improvement or regression. Repeated runs with count=100 with and without this change both have a median between 2.6ms and 2.9ms using PHP 5.6, and between 2.6ms and 2.8ms using HHVM 3.12. Bug: T60473 Change-Id: I6d6a077ad76588f3ed81b1901a26b7e56d2157ee --- includes/libs/CSSMin.php | 77 +++++++++++++- tests/phpunit/includes/libs/CSSMinTest.php | 118 +++++++++++++-------- 2 files changed, 146 insertions(+), 49 deletions(-) diff --git a/includes/libs/CSSMin.php b/includes/libs/CSSMin.php index bba07e263b..c504f3531c 100644 --- a/includes/libs/CSSMin.php +++ b/includes/libs/CSSMin.php @@ -38,7 +38,7 @@ class CSSMin { * Internet Explorer data URI length limit. See encodeImageAsDataURI(). */ const DATA_URI_SIZE_LIMIT = 32768; - const URL_REGEX = 'url\(\s*[\'"]?(?P[^\?\)\'"]*?)(?P\?[^\)\'"]*?|)[\'"]?\s*\)'; + const EMBED_REGEX = '\/\*\s*\@embed\s*\*\/'; const COMMENT_REGEX = '\/\*.*?\*\/'; @@ -72,8 +72,9 @@ class CSSMin { $files = []; $rFlags = PREG_OFFSET_CAPTURE | PREG_SET_ORDER; - if ( preg_match_all( '/' . self::URL_REGEX . '/', $stripped, $matches, $rFlags ) ) { + if ( preg_match_all( '/' . self::getUrlRegex() . '/', $stripped, $matches, $rFlags ) ) { foreach ( $matches as $match ) { + self::processUrlMatch( $match, $rFlags ); $url = $match['file'][0]; // Skip fully-qualified and protocol-relative URLs and data URIs @@ -266,7 +267,7 @@ class CSSMin { // appears in the rule itself, e.g. in a quoted string. You are advised // not to use such characters in file names. We also match start/end of // the string to be consistent in edge-cases ('@import url(…)'). - $pattern = '/(?:^|[;{])\K[^;{}]*' . CSSMin::URL_REGEX . '[^;}]*(?=[;}]|$)/'; + $pattern = '/(?:^|[;{])\K[^;{}]*' . self::getUrlRegex() . '[^;}]*(?=[;}]|$)/'; $source = preg_replace_callback( $pattern, @@ -290,13 +291,14 @@ class CSSMin { // Build two versions of current rule: with remapped URLs // and with embedded data: URIs (where possible). - $pattern = '/(?P' . CSSMin::EMBED_REGEX . '\s*|)' . CSSMin::URL_REGEX . '/'; + $pattern = '/(?P' . CSSMin::EMBED_REGEX . '\s*|)' . self::getUrlRegex() . '/'; $ruleWithRemapped = preg_replace_callback( $pattern, function ( $match ) use ( $local, $remote ) { - $remapped = CSSMin::remapOne( $match['file'], $match['query'], $local, $remote, false ); + self::processUrlMatch( $match ); + $remapped = CSSMin::remapOne( $match['file'], $match['query'], $local, $remote, false ); return CSSMin::buildUrlValue( $remapped ); }, $rule @@ -309,6 +311,8 @@ class CSSMin { $ruleWithEmbedded = preg_replace_callback( $pattern, function ( $match ) use ( $embedAll, $local, $remote, &$mimeTypes ) { + self::processUrlMatch( $match ); + $embed = $embedAll || $match['embed']; $embedded = CSSMin::remapOne( $match['file'], @@ -385,6 +389,69 @@ class CSSMin { return false; } + private static function getUrlRegex() { + static $urlRegex; + if ( $urlRegex === null ) { + // Match these three variants separately to avoid broken urls when + // e.g. a double quoted url contains a parenthesis, or when a + // single quoted url contains a double quote, etc. + // Note: PCRE doesn't support multiple capture groups with the same name by default. + // - PCRE 6.7 introduced the "J" modifier (PCRE_INFO_JCHANGED for PCRE_DUPNAMES). + // https://secure.php.net/manual/en/reference.pcre.pattern.modifiers.php + // However this isn't useful since it just ignores all but the first one. + // Also, while the modifier was introduced in PCRE 6.7 (PHP 5.2+) it was + // not exposed to public preg_* functions until PHP 5.6.0. + // - PCRE 8.36 fixed this to work as expected (e.g. merge conceptually to + // only return the one matched in the part that actually matched). + // However MediaWiki supports 5.5.9, which has PCRE 8.32 + // Per https://secure.php.net/manual/en/pcre.installation.php: + // - PCRE 8.32 (PHP 5.5.0) + // - PCRE 8.34 (PHP 5.5.10, PHP 5.6.0) + // - PCRE 8.37 (PHP 5.5.26, PHP 5.6.9, PHP 7.0.0) + // Workaround by using different groups and merge via processUrlMatch(). + // - Using string concatenation for class constant or member assignments + // is only supported in PHP 5.6. Use a getter method for now. + $urlRegex = '(' . + // Unquoted url + 'url\(\s*(?P[^\'"][^\?\)]*?)(?P\?[^\)]*?|)\s*\)' . + // Single quoted url + '|url\(\s*\'(?P[^\?\']*?)(?P\?[^\']*?|)\'\s*\)' . + // Double quoted url + '|url\(\s*"(?P[^\?"]*?)(?P\?[^"]*?|)"\s*\)' . + ')'; + } + return $urlRegex; + } + + private static function processUrlMatch( array &$match, $flags = 0 ) { + if ( $flags & PREG_SET_ORDER ) { + // preg_match_all with PREG_SET_ORDER will return each group in each + // match array, and if it didn't match, instead of the sub array + // being an empty array it is `[ '', -1 ]`... + if ( isset( $match['file0'] ) && $match['file0'][1] !== -1 ) { + $match['file'] = $match['file0']; + $match['query'] = $match['query0']; + } elseif ( isset( $match['file1'] ) && $match['file1'][1] !== -1 ) { + $match['file'] = $match['file1']; + $match['query'] = $match['query1']; + } else { + $match['file'] = $match['file2']; + $match['query'] = $match['query2']; + } + } else { + if ( isset( $match['file0'] ) && $match['file0'] !== '' ) { + $match['file'] = $match['file0']; + $match['query'] = $match['query0']; + } elseif ( isset( $match['file1'] ) && $match['file1'] !== '' ) { + $match['file'] = $match['file1']; + $match['query'] = $match['query1']; + } else { + $match['file'] = $match['file2']; + $match['query'] = $match['query2']; + } + } + } + /** * Remap or embed a CSS URL path. * diff --git a/tests/phpunit/includes/libs/CSSMinTest.php b/tests/phpunit/includes/libs/CSSMinTest.php index 42f08cc0a0..d0121b15aa 100644 --- a/tests/phpunit/includes/libs/CSSMinTest.php +++ b/tests/phpunit/includes/libs/CSSMinTest.php @@ -147,6 +147,45 @@ class CSSMinTest extends MediaWikiTestCase { ]; } + public static function provideIsRemoteUrl() { + return [ + [ true, 'http://localhost/w/red.gif?123' ], + [ true, 'https://example.org/x.png' ], + [ true, '//example.org/x.y.z/image.png' ], + [ true, '//localhost/styles.css?query=yes' ], + [ true, '' ], + [ false, 'x.gif' ], + [ false, '/x.gif' ], + [ false, './x.gif' ], + [ false, '../x.gif' ], + ]; + } + + /** + * @dataProvider provideIsRemoteUrl + * @cover CSSMin::isRemoteUrl + */ + public function testIsRemoteUrl( $expect, $url ) { + $this->assertEquals( CSSMinTestable::isRemoteUrl( $url ), $expect ); + } + + public static function provideIsLocalUrls() { + return [ + [ false, 'x.gif' ], + [ true, '/x.gif' ], + [ false, './x.gif' ], + [ false, '../x.gif' ], + ]; + } + + /** + * @dataProvider provideIsLocalUrls + * @cover CSSMin::isLocalUrl + */ + public function testIsLocalUrl( $expect, $url ) { + $this->assertEquals( CSSMinTestable::isLocalUrl( $url ), $expect ); + } + /** * This tests funky parameters to CSSMin::remap. testRemapRemapping tests * the basic functionality. @@ -211,45 +250,6 @@ class CSSMinTest extends MediaWikiTestCase { $this->assertEquals( $expectedOutput, $realOutput, "CSSMin::remap: $message" ); } - public static function provideIsRemoteUrl() { - return [ - [ true, 'http://localhost/w/red.gif?123' ], - [ true, 'https://example.org/x.png' ], - [ true, '//example.org/x.y.z/image.png' ], - [ true, '//localhost/styles.css?query=yes' ], - [ true, '' ], - [ false, 'x.gif' ], - [ false, '/x.gif' ], - [ false, './x.gif' ], - [ false, '../x.gif' ], - ]; - } - - /** - * @dataProvider provideIsRemoteUrl - * @cover CSSMin::isRemoteUrl - */ - public function testIsRemoteUrl( $expect, $url ) { - $this->assertEquals( CSSMinTestable::isRemoteUrl( $url ), $expect ); - } - - public static function provideIsLocalUrls() { - return [ - [ false, 'x.gif' ], - [ true, '/x.gif' ], - [ false, './x.gif' ], - [ false, '../x.gif' ], - ]; - } - - /** - * @dataProvider provideIsLocalUrls - * @cover CSSMin::isLocalUrl - */ - public function testIsLocalUrl( $expect, $url ) { - $this->assertEquals( CSSMinTestable::isLocalUrl( $url ), $expect ); - } - public static function provideRemapRemappingCases() { // red.gif and green.gif are one-pixel 35-byte GIFs. // large.png is a 35K PNG that should be non-embeddable. @@ -307,8 +307,8 @@ class CSSMinTest extends MediaWikiTestCase { ], [ 'Remote URL (unnecessary quotes not preserved)', - 'foo { background: url("http://example.org/w/foo.png"); }', - 'foo { background: url(http://example.org/w/foo.png); }', + 'foo { background: url("http://example.org/w/unnecessary-quotes.png"); }', + 'foo { background: url(http://example.org/w/unnecessary-quotes.png); }', ], [ 'Embedded file', @@ -410,9 +410,39 @@ class CSSMinTest extends MediaWikiTestCase { '@import url(http://doc.example.org/styles.css)', ], [ - '@import rule to URL (should we remap this?)', - '@import url(//localhost/styles.css?query=yes)', - '@import url(//localhost/styles.css?query=yes)', + '@import rule to local file (should we remap this?)', + '@import url(/styles.css)', + '@import url(http://doc.example.org/styles.css)', + ], + [ + '@import rule to URL', + '@import url(//localhost/styles.css?query=val)', + '@import url(//localhost/styles.css?query=val)', + ], + [ + 'Background URL (double quotes)', + 'foo { background: url("//localhost/styles.css?quoted=double") }', + 'foo { background: url(//localhost/styles.css?quoted=double) }', + ], + [ + 'Background URL (single quotes)', + 'foo { background: url(\'//localhost/styles.css?quoted=single\') }', + 'foo { background: url(//localhost/styles.css?quoted=single) }', + ], + [ + 'Background URL (containing parentheses; T60473)', + 'foo { background: url("//localhost/styles.css?query=(parens)") }', + 'foo { background: url("//localhost/styles.css?query=(parens)") }', + ], + [ + 'Background URL (double quoted, containing single quotes; T60473)', + 'foo { background: url("//localhost/styles.css?quote=\'") }', + 'foo { background: url("//localhost/styles.css?quote=\'") }', + ], + [ + 'Background URL (single quoted, containing double quotes; T60473)', + 'foo { background: url(\'//localhost/styles.css?quote="\') }', + 'foo { background: url("//localhost/styles.css?quote=\"") }', ], [ 'Simple case with comments before url', -- 2.20.1