From b6a5bb484d90c6705bd1b010efcc2c1c18be1f34 Mon Sep 17 00:00:00 2001 From: Kevin Israel Date: Tue, 8 Oct 2013 17:23:06 -0400 Subject: [PATCH] FormatJson: Remove whitespace from empty arrays and objects As noted in c370ad21d7f8, the pretty output can differ between Zend PHP and HHVM. This change adds some post-processing to make the output consistent across implementations and with JavaScript JSON.stringify() and Python json.dumps(); all whitespace between the opening and closing brackets/braces is removed. Change-Id: I490e0ff1fac3d6c3fb44ab127e432872c0301a9d --- includes/json/FormatJson.php | 20 ++++++++++++++++++- .../phpunit/includes/json/FormatJsonTest.php | 14 +++++++------ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/includes/json/FormatJson.php b/includes/json/FormatJson.php index bc2fff10c5..d61165120e 100644 --- a/includes/json/FormatJson.php +++ b/includes/json/FormatJson.php @@ -55,6 +55,17 @@ class FormatJson { */ const ALL_OK = 3; + /** + * Regex that matches whitespace inside empty arrays and objects. + * + * This doesn't affect regular strings inside the JSON because those can't + * have a real line break (\n) in them, at this point they are already escaped + * as the string "\n" which this doesn't match. + * + * @private + */ + const WS_CLEANUP_REGEX = '/(?<=[\[{])\n\s*+(?=[\]}])/'; + /** * Characters problematic in JavaScript. * @@ -130,6 +141,12 @@ class FormatJson { if ( $json === false ) { return false; } + + if ( $pretty ) { + // Remove whitespace inside empty arrays/objects; different JSON encoders + // vary on this, and we want our output to be consistent across implementations. + $json = preg_replace( self::WS_CLEANUP_REGEX, '', $json ); + } if ( $escaping & self::UTF8_OK ) { $json = str_replace( self::$badChars, self::$badCharsEscaped, $json ); } @@ -213,6 +230,7 @@ class FormatJson { $buf .= substr( $json, $i, $skip ); } } - return str_replace( "\x01", '\"', preg_replace( '/ +$/m', '', $buf ) ); + $buf = preg_replace( self::WS_CLEANUP_REGEX, '', $buf ); + return str_replace( "\x01", '\"', $buf ); } } diff --git a/tests/phpunit/includes/json/FormatJsonTest.php b/tests/phpunit/includes/json/FormatJsonTest.php index eb024aba2a..149be05b47 100644 --- a/tests/phpunit/includes/json/FormatJsonTest.php +++ b/tests/phpunit/includes/json/FormatJsonTest.php @@ -12,28 +12,30 @@ class FormatJsonTest extends MediaWikiTestCase { 123, 456, ), + // Nested json works without problems '"7":["8",{"9":"10"}]', + // Whitespace clean up doesn't touch strings that look alike + "{\n\t\"emptyObject\": {\n\t},\n\t\"emptyArray\": [ ]\n}", ), ); // 4 space indent, no trailing whitespace, no trailing linefeed $json = '{ - "emptyObject": { - }, - "emptyArray": [ - ], + "emptyObject": {}, + "emptyArray": [], "string": "foobar\\\\", "filledArray": [ [ 123, 456 ], - "\"7\":[\"8\",{\"9\":\"10\"}]" + "\"7\":[\"8\",{\"9\":\"10\"}]", + "{\n\t\"emptyObject\": {\n\t},\n\t\"emptyArray\": [ ]\n}" ] }'; $json = str_replace( "\r", '', $json ); // Windows compat - $this->assertSame( $json, str_replace("\n\n", "\n", FormatJson::encode( $obj, true ) )); + $this->assertSame( $json, FormatJson::encode( $obj, true ) ); } public static function provideEncodeDefault() { -- 2.20.1