Rewrite logstash key conflict warning from I6677dbf6
authorGergő Tisza <gtisza@wikimedia.org>
Wed, 23 Nov 2016 01:01:07 +0000 (01:01 +0000)
committerGergő Tisza <gtisza@wikimedia.org>
Tue, 29 Nov 2016 11:14:08 +0000 (11:14 +0000)
* do not warn if something is overwritten with an identical value
  (happens a lot with 'ip')
* move to LogstashFormatter so we can check for the value
* instead of spamming errors, just add a flag to the logstash data

Bug: T145133
Change-Id: I31caee865cd60c785126478ac75c9aefce78eaaf

autoload.php
includes/debug/logger/monolog/LogstashFormatter.php [new file with mode: 0644]
includes/debug/logger/monolog/WikiProcessor.php
tests/phpunit/includes/debug/logger/monolog/LogstashFormatterTest.php [new file with mode: 0644]

index 30ef985..f0bbe92 100644 (file)
@@ -875,6 +875,7 @@ $wgAutoloadLocalClasses = [
        'MediaWiki\\Logger\\Monolog\\LegacyFormatter' => __DIR__ . '/includes/debug/logger/monolog/LegacyFormatter.php',
        'MediaWiki\\Logger\\Monolog\\LegacyHandler' => __DIR__ . '/includes/debug/logger/monolog/LegacyHandler.php',
        'MediaWiki\\Logger\\Monolog\\LineFormatter' => __DIR__ . '/includes/debug/logger/monolog/LineFormatter.php',
+       'MediaWiki\\Logger\\Monolog\\LogstashFormatter' => __DIR__ . '/includes/debug/logger/monolog/LogstashFormatter.php',
        'MediaWiki\\Logger\\Monolog\\SyslogHandler' => __DIR__ . '/includes/debug/logger/monolog/SyslogHandler.php',
        'MediaWiki\\Logger\\Monolog\\WikiProcessor' => __DIR__ . '/includes/debug/logger/monolog/WikiProcessor.php',
        'MediaWiki\\Logger\\NullSpi' => __DIR__ . '/includes/debug/logger/NullSpi.php',
diff --git a/includes/debug/logger/monolog/LogstashFormatter.php b/includes/debug/logger/monolog/LogstashFormatter.php
new file mode 100644 (file)
index 0000000..553cbf6
--- /dev/null
@@ -0,0 +1,83 @@
+<?php
+
+namespace MediaWiki\Logger\Monolog;
+
+/**
+ * LogstashFormatter squashes the base message array and the context and extras subarrays into one.
+ * This can result in unfortunately named context fields overwriting other data (T145133).
+ * This class modifies the standard LogstashFormatter to rename such fields and flag the message.
+ *
+ * Compatible with Monolog 1.x only.
+ *
+ * @since 1.29
+ */
+class LogstashFormatter extends \Monolog\Formatter\LogstashFormatter {
+       /** @var array Keys which should not be used in log context */
+       protected $reservedKeys = [
+               // from LogstashFormatter
+               'message', 'channel', 'level', 'type',
+               // from WebProcessor
+               'url', 'ip', 'http_method', 'server', 'referrer',
+               // from WikiProcessor
+               'host', 'wiki', 'reqId', 'mwversion',
+               // from config magic
+               'normalized_message',
+       ];
+
+       /**
+        * Prevent key conflicts
+        * @param array $record
+        * @return array
+        */
+       protected function formatV0( array $record ) {
+               if ( $this->contextPrefix ) {
+                       return parent::formatV0( $record );
+               }
+
+               $context = !empty( $record['context'] ) ? $record['context'] : [];
+               $record['context'] = [];
+               $formatted = parent::formatV0( $record );
+
+               $formatted['@fields'] = $this->fixKeyConflicts( $formatted['@fields'], $context );
+               return $formatted;
+       }
+
+       /**
+        * Prevent key conflicts
+        * @param array $record
+        * @return array
+        */
+       protected function formatV1( array $record ) {
+               if ( $this->contextPrefix ) {
+                       return parent::formatV1( $record );
+               }
+
+               $context = !empty( $record['context'] ) ? $record['context'] : [];
+               $record['context'] = [];
+               $formatted = parent::formatV1( $record );
+
+               $formatted = $this->fixKeyConflicts( $formatted, $context );
+               return $formatted;
+       }
+
+       /**
+        * Check whether some context field would overwrite another message key. If so, rename
+        * and flag.
+        * @param array $fields Fields to be sent to logstash
+        * @param array $context Copy of the original $record['context']
+        * @return array Updated version of $fields
+        */
+       protected function fixKeyConflicts( array $fields, array $context ) {
+               foreach ( $context as $key => $val ) {
+                       if (
+                               in_array( $key, $this->reservedKeys, true ) &&
+                               isset( $fields[$key] ) && $fields[$key] !== $val
+                       ) {
+                               $fields['logstash_formatter_key_conflict'][] = $key;
+                               $key = 'c_' . $key;
+                       }
+                       $fields[$key] = $val;
+               }
+               return $fields;
+       }
+}
index 81e1e14..ad939a0 100644 (file)
@@ -29,17 +29,6 @@ namespace MediaWiki\Logger\Monolog;
  * @copyright © 2013 Bryan Davis and Wikimedia Foundation.
  */
 class WikiProcessor {
-       /** @var array Keys which should not be used in log context */
-       protected $reservedKeys = [
-               // from monolog:src/Monolog/Formatter/LogstashFormatter.php#L71-L88
-               'message', 'channel', 'level', 'type',
-               // from WebProcessor
-               'url', 'ip', 'http_method', 'server', 'referrer',
-               // from WikiProcessor
-               'host', 'wiki', 'reqId', 'mwversion',
-               // from config magic
-               'normalized_message',
-       ];
 
        /**
         * @param array $record
@@ -47,15 +36,6 @@ class WikiProcessor {
         */
        public function __invoke( array $record ) {
                global $wgVersion;
-
-               // some log aggregators such as Logstash will merge the log context into the main
-               // metadata and end up overwriting the data coming from processors
-               foreach ( $this->reservedKeys as $key ) {
-                       if ( isset( $record['context'][$key] ) ) {
-                               wfLogWarning( __METHOD__ . ": '$key' key overwritten in log context." );
-                       }
-               }
-
                $record['extra'] = array_merge(
                        $record['extra'],
                        [
@@ -67,4 +47,5 @@ class WikiProcessor {
                );
                return $record;
        }
+
 }
diff --git a/tests/phpunit/includes/debug/logger/monolog/LogstashFormatterTest.php b/tests/phpunit/includes/debug/logger/monolog/LogstashFormatterTest.php
new file mode 100644 (file)
index 0000000..8086b4b
--- /dev/null
@@ -0,0 +1,55 @@
+<?php
+
+namespace MediaWiki\Logger\Monolog;
+
+class LogstashFormatterTest extends \PHPUnit_Framework_TestCase {
+       /**
+        * @dataProvider provideV1
+        * @param array $record The input record.
+        * @param array $expected Associative array of expected keys and their values.
+        * @param array $notExpected List of keys that should not exist.
+        */
+       public function testV1( array $record, array $expected, array $notExpected ) {
+               $formatter = new LogstashFormatter( 'app', 'system', null, null, LogstashFormatter::V1 );
+               $formatted = json_decode( $formatter->format( $record ), true );
+               foreach ( $expected as $key => $value ) {
+                       $this->assertArrayHasKey( $key, $formatted );
+                       $this->assertSame( $value, $formatted[$key] );
+               }
+               foreach ( $notExpected as $key ) {
+                       $this->assertArrayNotHasKey( $key, $formatted );
+               }
+       }
+
+       public function provideV1() {
+               return [
+                       [
+                               [ 'extra' => [ 'foo' => 1 ], 'context' => [ 'bar' => 2 ] ],
+                               [ 'foo' => 1, 'bar' => 2 ],
+                               [ 'logstash_formatter_key_conflict' ],
+                       ],
+                       [
+                               [ 'extra' => [ 'url' => 1 ], 'context' => [ 'url' => 2 ] ],
+                               [ 'url' => 1, 'c_url' => 2, 'logstash_formatter_key_conflict' => [ 'url' ] ],
+                               [],
+                       ],
+                       [
+                               [ 'channel' => 'x', 'context' => [ 'channel' => 'y' ] ],
+                               [ 'channel' => 'x', 'c_channel' => 'y',
+                                       'logstash_formatter_key_conflict' => [ 'channel' ] ],
+                               [],
+                       ],
+               ];
+       }
+
+       public function testV1WithPrefix() {
+               $formatter = new LogstashFormatter( 'app', 'system', null, 'ctx_', LogstashFormatter::V1 );
+               $record = [ 'extra' => [ 'url' => 1 ], 'context' => [ 'url' => 2 ] ];
+               $formatted = json_decode( $formatter->format( $record ), true );
+               $this->assertArrayHasKey( 'url', $formatted );
+               $this->assertSame( 1, $formatted['url'] );
+               $this->assertArrayHasKey( 'ctx_url', $formatted );
+               $this->assertSame( 2, $formatted['ctx_url'] );
+               $this->assertArrayNotHasKey( 'c_url', $formatted );
+       }
+}