ApiCSPReport: Support origin/path matching for false positives
authorTimo Tijhof <krinklemail@gmail.com>
Fri, 21 Dec 2018 20:56:57 +0000 (12:56 -0800)
committerKrinkle <krinklemail@gmail.com>
Tue, 16 Apr 2019 23:51:56 +0000 (23:51 +0000)
According to https://www.tollmanz.com/content-security-policy-report-samples/,
browsers are meant to normalise blocked-url to just the origin,
similar to referer.

However, not all browsers do this in practice, and even in Chrome
it only applies if CORS is not authorising the origin to see the
full url, which means it is usually still the full url for things
like CORS API requests to things under wmflabs.org.

The purpose of this change is to allow a wmflabs.org subdomain
and certain subdirectories to be set as false positive and have
it not log to Logstash in wmf-production.

Bug: T207900
Change-Id: I21f93223e0e3a6ca2dbbb95163a02cd88e4dfc8f

includes/api/ApiCSPReport.php

index 6271128..1584164 100644 (file)
@@ -104,11 +104,11 @@ class ApiCSPReport extends ApiBase {
                        ) ||
                        (
                                isset( $report['blocked-uri'] ) &&
-                               isset( $falsePositives[$report['blocked-uri']] )
+                               $this->matchUrlPattern( $report['blocked-uri'], $falsePositives )
                        ) ||
                        (
                                isset( $report['source-file'] ) &&
-                               isset( $falsePositives[$report['source-file']] )
+                               $this->matchUrlPattern( $report['source-file'], $falsePositives )
                        )
                ) {
                        // False positive due to:
@@ -119,6 +119,39 @@ class ApiCSPReport extends ApiBase {
                return $flags;
        }
 
+       /**
+        * @param string $url
+        * @param string[] $patterns
+        * @return bool
+        */
+       private function matchUrlPattern( $url, array $patterns ) {
+               if ( isset( $patterns[ $url ] ) ) {
+                       return true;
+               }
+
+               $bits = wfParseUrl( $url );
+               unset( $bits['user'], $bits['pass'], $bits['query'], $bits['fragment'] );
+               $bits['path'] = '';
+               $serverUrl = wfAssembleUrl( $bits );
+               if ( isset( $patterns[$serverUrl] ) ) {
+                       // The origin of the url matches a pattern,
+                       // e.g. "https://example.org" matches "https://example.org/foo/b?a#r"
+                       return true;
+               }
+               foreach ( $patterns as $pattern => $val ) {
+                       // We only use this pattern if it ends in a slash, this prevents
+                       // "/foos" from matching "/foo", and "https://good.combo.bad" matching
+                       // "https://good.com".
+                       if ( substr( $pattern, -1 ) === '/' && strpos( $url, $pattern ) === 0 ) {
+                               // The pattern starts with the same as the url
+                               // e.g. "https://example.org/foo/" matches "https://example.org/foo/b?a#r"
+                               return true;
+                       }
+               }
+
+               return false;
+       }
+
        /**
         * Output an api error if post body is obviously not OK.
         */