From 8fea9c619d07deafee54763dfe20538406a9bf13 Mon Sep 17 00:00:00 2001 From: Bryan Davis Date: Sun, 12 Oct 2014 11:59:00 -0600 Subject: [PATCH] FormatJson::stripComments Add stripComments method that can be used to remove single line and multiline comments from an otherwise valid JSON string. Inspired by the comment removal code in redisJobRunnerService and discussions on irc about the Extension registration RFC. Change-Id: Ie743957bfbb7b1fca8cb78ad48c1efd953362fde --- includes/json/FormatJson.php | 88 ++++++++++++++++++- .../phpunit/includes/json/FormatJsonTest.php | 58 ++++++++++++ 2 files changed, 145 insertions(+), 1 deletion(-) diff --git a/includes/json/FormatJson.php b/includes/json/FormatJson.php index f3e5c76ddf..74775b5b9d 100644 --- a/includes/json/FormatJson.php +++ b/includes/json/FormatJson.php @@ -70,6 +70,13 @@ class FormatJson { */ const TRY_FIXING = 0x200; + /** + * If set, strip comments from input before parsing as JSON. + * + * @since 1.25 + */ + const STRIP_COMMENTS = 0x400; + /** * Regex that matches whitespace inside empty arrays and objects. * @@ -150,10 +157,14 @@ class FormatJson { * Unlike FormatJson::decode(), if $value represents null value, it will be properly decoded as valid. * * @param string $value The JSON string being decoded - * @param int $options A bit field that allows FORCE_ASSOC, TRY_FIXING + * @param int $options A bit field that allows FORCE_ASSOC, TRY_FIXING, + * STRIP_COMMENTS * @return Status If valid JSON, the value is available in $result->getValue() */ public static function parse( $value, $options = 0 ) { + if ( $options & self::STRIP_COMMENTS ) { + $value = self::stripComments( $value ); + } $assoc = ( $options & self::FORCE_ASSOC ) !== 0; $result = json_decode( $value, $assoc ); $code = json_last_error(); @@ -347,4 +358,79 @@ class FormatJson { return str_replace( "\x01", '\"', $buf ); } + + /** + * Remove multiline and single line comments from an otherwise valid JSON + * input string. This can be used as a preprocessor for to allow JSON + * formatted configuration files to contain comments. + * + * @param string $json + * @return string JSON with comments removed + */ + public static function stripComments( $json ) { + // Ensure we have a string + $str = (string) $json; + $buffer = ''; + $maxLen = strlen( $str ); + $mark = 0; + + $inString = false; + $inComment = false; + $multiline = false; + + for ($idx = 0; $idx < $maxLen; $idx++) { + switch ( $str[$idx] ) { + case '"': + $lookBehind = ( $idx - 1 >= 0 ) ? $str[$idx - 1] : ''; + if ( !$inComment && $lookBehind !== '\\' ) { + // Either started or ended a string + $inString = !$inString; + } + break; + + case '/': + $lookAhead = ( $idx + 1 < $maxLen ) ? $str[$idx + 1] : ''; + $lookBehind = ( $idx - 1 >= 0 ) ? $str[$idx - 1] : ''; + if ( $inString ) { + continue; + + } elseif ( !$inComment && + ( $lookAhead === '/' || $lookAhead === '*' ) + ) { + // Transition into a comment + // Add characters seen to buffer + $buffer .= substr( $str, $mark, $idx - $mark ); + // Consume the look ahead character + $idx++; + // Track state + $inComment = true; + $multiline = $lookAhead === '*'; + + } elseif ( $multiline && $lookBehind === '*' ) { + // Found the end of the current comment + $mark = $idx + 1; + $inComment = false; + $multiline = false; + } + break; + + case "\n": + if ( $inComment && !$multiline ) { + // Found the end of the current comment + $mark = $idx + 1; + $inComment = false; + } + break; + } + } + if ( $inComment ) { + // Comment ends with input + // Technically we should check to ensure that we aren't in + // a multiline comment that hasn't been properly ended, but this + // is a strip filter, not a validating parser. + $mark = $maxLen; + } + // Add final chunk to buffer before returning + return $buffer . substr( $str, $mark, $maxLen - $mark ); + } } diff --git a/tests/phpunit/includes/json/FormatJsonTest.php b/tests/phpunit/includes/json/FormatJsonTest.php index af68ab03ad..456266f7be 100644 --- a/tests/phpunit/includes/json/FormatJsonTest.php +++ b/tests/phpunit/includes/json/FormatJsonTest.php @@ -222,6 +222,64 @@ class FormatJsonTest extends MediaWikiTestCase { $this->assertFalse( $st->isOK() ); } + public function provideStripComments() { + return array( + array( '{"a":"b"}', '{"a":"b"}' ), + array( "{\"a\":\"b\"}\n", "{\"a\":\"b\"}\n" ), + array( '/*c*/{"c":"b"}', '{"c":"b"}' ), + array( '{"a":"c"}/*c*/', '{"a":"c"}' ), + array( '/*c//d*/{"c":"b"}', '{"c":"b"}' ), + array( '{/*c*/"c":"b"}', '{"c":"b"}' ), + array( "/*\nc\r\n*/{\"c\":\"b\"}", '{"c":"b"}' ), + array( "//c\n{\"c\":\"b\"}", '{"c":"b"}' ), + array( "//c\r\n{\"c\":\"b\"}", '{"c":"b"}' ), + array( '{"a":"c"}//c', '{"a":"c"}' ), + array( "{\"a-c\"://c\n\"b\"}", '{"a-c":"b"}' ), + array( '{"/*a":"b"}', '{"/*a":"b"}' ), + array( '{"a":"//b"}', '{"a":"//b"}' ), + array( '{"a":"b/*c*/"}', '{"a":"b/*c*/"}' ), + array( "{\"\\\"/*a\":\"b\"}", "{\"\\\"/*a\":\"b\"}" ), + array( '', '' ), + array( '/*c', '' ), + array( '//c', '' ), + array( '"http://example.com"', '"http://example.com"' ), + array( "\0", "\0" ), + array( '"Blåbærsyltetøy"', '"Blåbærsyltetøy"' ), + ); + } + + /** + * @covers FormatJson::stripComments + * @dataProvider provideStripComments + * @param string $json + * @param string $expect + */ + public function testStripComments( $json, $expect ) { + $this->assertSame( $expect, FormatJson::stripComments( $json ) ); + } + + public function provideParseStripComments() { + return array( + array( '/* blah */true', true ), + array( "// blah \ntrue", true ), + array( '[ "a" , /* blah */ "b" ]', array( 'a', 'b' ) ), + ); + } + + /** + * @covers FormatJson::parse + * @covers FormatJson::stripComments + * @dataProvider provideParseStripComments + * @param string $json + * @param mixed $expect + */ + public function testParseStripComments( $json, $expect ) { + $st = FormatJson::parse( $json, FormatJson::STRIP_COMMENTS ); + $this->assertType( 'Status', $st ); + $this->assertTrue( $st->isGood() ); + $this->assertEquals( $expect, $st->getValue() ); + } + /** * Generate a set of test cases for a particular combination of encoder options. * -- 2.20.1