SECURITY: Don't allow entities in XMP with HHVM
authorcsteipp <csteipp@wikimedia.org>
Thu, 12 Mar 2015 22:49:22 +0000 (15:49 -0700)
committercsteipp <csteipp@wikimedia.org>
Wed, 1 Apr 2015 16:56:43 +0000 (09:56 -0700)
Test for, and refuse to parse, XMP chunks with a doctype declaration
when parsing XMP under HHVM.

Bug: T85848
Change-Id: Iea4feb077ee85a35509a920153daaa9321ee69f3

includes/media/BitmapMetadataHandler.php
includes/media/JpegMetadataExtractor.php
includes/media/XMP.php
tests/phpunit/data/xmp/doctype-included.result.php [new file with mode: 0644]
tests/phpunit/data/xmp/doctype-included.xmp [new file with mode: 0644]
tests/phpunit/data/xmp/doctype-not-included.xmp [new file with mode: 0644]
tests/phpunit/includes/media/XMPTest.php

index bb7a1e8..c8d37bb 100644 (file)
@@ -154,7 +154,7 @@ class BitmapMetadataHandler {
         * @throws MWException On invalid file.
         */
        static function Jpeg( $filename ) {
-               $showXMP = function_exists( 'xml_parser_create_ns' );
+               $showXMP = XMPReader::isSupported();
                $meta = new self();
 
                $seg = JpegMetadataExtractor::segmentSplitter( $filename );
@@ -196,7 +196,7 @@ class BitmapMetadataHandler {
         * @return array Array for storage in img_metadata.
         */
        public static function PNG( $filename ) {
-               $showXMP = function_exists( 'xml_parser_create_ns' );
+               $showXMP = XMPReader::isSupported();
 
                $meta = new self();
                $array = PNGMetadataExtractor::getMetadata( $filename );
@@ -236,7 +236,7 @@ class BitmapMetadataHandler {
                        $meta->addMetadata( array( 'GIFFileComment' => $baseArray['comment'] ), 'native' );
                }
 
-               if ( $baseArray['xmp'] !== '' && function_exists( 'xml_parser_create_ns' ) ) {
+               if ( $baseArray['xmp'] !== '' && XMPReader::isSupported() ) {
                        $xmp = new XMPReader();
                        $xmp->parse( $baseArray['xmp'] );
                        $xmpRes = $xmp->getResults();
index 0d8013d..ae4af8d 100644 (file)
@@ -48,7 +48,7 @@ class JpegMetadataExtractor {
         * @throws MWException If given invalid file.
         */
        static function segmentSplitter( $filename ) {
-               $showXMP = function_exists( 'xml_parser_create_ns' );
+               $showXMP = XMPReader::isSupported();
 
                $segmentCount = 0;
 
index 0d341aa..50f04ae 100644 (file)
@@ -80,6 +80,12 @@ class XMPReader {
        /** @var int */
        private $extendedXMPOffset = 0;
 
+       /** @var int Flag determining if the XMP is safe to parse **/
+       private $parsable = 0;
+
+       /** @var string Buffer of XML to parse **/
+       private $xmlParsableBuffer = '';
+
        /**
         * These are various mode constants.
         * they are used to figure out what to do
@@ -108,6 +114,12 @@ class XMPReader {
        const NS_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
        const NS_XML = 'http://www.w3.org/XML/1998/namespace';
 
+       // States used while determining if XML is safe to parse
+       const PARSABLE_UNKNOWN = 0;
+       const PARSABLE_OK = 1;
+       const PARSABLE_BUFFERING = 2;
+       const PARSABLE_NO = 3;
+
        /**
         * Constructor.
         *
@@ -145,6 +157,9 @@ class XMPReader {
                        array( $this, 'endElement' ) );
 
                xml_set_character_data_handler( $this->xmlParser, array( $this, 'char' ) );
+
+               $this->parsable = self::PARSABLE_UNKNOWN;
+               $this->xmlParsableBuffer = '';
        }
 
        /** Destroy the xml parser
@@ -156,6 +171,13 @@ class XMPReader {
                xml_parser_free( $this->xmlParser );
        }
 
+       /**
+        * Check if this instance supports using this class
+        */
+       public static function isSupported() {
+               return function_exists( 'xml_parser_create_ns' ) && class_exists( 'XMLReader' );
+       }
+
        /** Get the result array. Do some post-processing before returning
         * the array, and transform any metadata that is special-cased.
         *
@@ -305,6 +327,27 @@ class XMPReader {
                                wfRestoreWarnings();
                        }
 
+                       // Ensure the XMP block does not have an xml doctype declaration, which
+                       // could declare entities unsafe to parse with xml_parse (T85848/T71210).
+                       if ( $this->parsable !== self::PARSABLE_OK ) {
+                               if ( $this->parsable === self::PARSABLE_NO ) {
+                                       throw new Exception( 'Unsafe doctype declaration in XML.' );
+                               }
+
+                               $content = $this->xmlParsableBuffer . $content;
+                               if ( !$this->checkParseSafety( $content ) ) {
+                                       if ( !$allOfIt && $this->parsable !== self::PARSABLE_NO ) {
+                                               // parse wasn't Unsuccessful yet, so return true
+                                               // in this case.
+                                               return true;
+                                       }
+                                       $msg = ( $this->parsable === self::PARSABLE_NO ) ?
+                                               'Unsafe doctype declaration in XML.' :
+                                               'No root element found in XML.';
+                                       throw new Exception( $msg );
+                               }
+                       }
+
                        $ok = xml_parse( $this->xmlParser, $content, $allOfIt );
                        if ( !$ok ) {
                                $error = xml_error_string( xml_get_error_code( $this->xmlParser ) );
@@ -437,6 +480,62 @@ class XMPReader {
                }
        }
 
+       /**
+        * Check if a block of XML is safe to pass to xml_parse, i.e. doesn't
+        * contain a doctype declaration which could contain a dos attack if we
+        * parse it and expand internal entities (T85848).
+        *
+        * @param string $content xml string to check for parse safety
+        * @return bool true if the xml is safe to parse, false otherwise
+        */
+       private function checkParseSafety( $content ) {
+               $reader = new XMLReader();
+               $result = null;
+
+               // For XMLReader to parse incomplete/invalid XML, it has to be open()'ed
+               // instead of using XML().
+               $reader->open(
+                       'data://text/plain,' . urlencode( $content ),
+                       null,
+                       LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_NONET
+               );
+
+               $oldDisable = libxml_disable_entity_loader( true );
+               $reset = new ScopedCallback(
+                       'libxml_disable_entity_loader',
+                       array( $oldDisable )
+               );
+               $reader->setParserProperty( XMLReader::SUBST_ENTITIES, false );
+
+               // Even with LIBXML_NOWARNING set, XMLReader::read gives a warning
+               // when parsing truncated XML, which causes unit tests to fail.
+               wfSuppressWarnings();
+               while ( $reader->read() ) {
+                       if ( $reader->nodeType === XMLReader::ELEMENT ) {
+                               // Reached the first element without hitting a doctype declaration
+                               $this->parsable = self::PARSABLE_OK;
+                               $result = true;
+                               break;
+                       }
+                       if ( $reader->nodeType === XMLReader::DOC_TYPE ) {
+                               $this->parsable = self::PARSABLE_NO;
+                               $result = false;
+                               break;
+                       }
+               }
+               wfRestoreWarnings();
+
+               if ( !is_null( $result ) ) {
+                       return $result;
+               }
+
+               // Reached the end of the parsable xml without finding an element
+               // or doctype. Buffer and try again.
+               $this->parsable = self::PARSABLE_BUFFERING;
+               $this->xmlParsableBuffer = $content;
+               return false;
+       }
+
        /** When we hit a closing element in MODE_IGNORE
         * Check to see if this is the element we started to ignore,
         * in which case we get out of MODE_IGNORE
diff --git a/tests/phpunit/data/xmp/doctype-included.result.php b/tests/phpunit/data/xmp/doctype-included.result.php
new file mode 100644 (file)
index 0000000..9a9cc36
--- /dev/null
@@ -0,0 +1,3 @@
+<?php
+
+$result = array();
diff --git a/tests/phpunit/data/xmp/doctype-included.xmp b/tests/phpunit/data/xmp/doctype-included.xmp
new file mode 100644 (file)
index 0000000..8c94675
--- /dev/null
@@ -0,0 +1,12 @@
+<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?> <!DOCTYPE x:xmpmeta [ <!ENTITY lol "lollollollollollollollollollollol"> ]>
+<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core
+ 4.1.3-c001 49.282696, Mon Apr 02 2007 21:16:10        ">
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+<rdf:Description
+ rdf:about=""
+ xmlns:exif="http://ns.adobe.com/exif/1.0/"
+ exif:DigitalZoomRatio="0/10">
+<exif:Flash rdf:parseType='Resource'>
+<exif:Fired>True</exif:Fired> <exif:Return>0</exif:Return> <exif:Mode>1</exif:Mode> <exif:Function>False</exif:Function> <exif:RedEyeMode>False</exif:RedEyeMode></exif:Flash> </rdf:Description> </rdf:RDF> </x:xmpmeta>
+
+<?xpacket end="w"?>
diff --git a/tests/phpunit/data/xmp/doctype-not-included.xmp b/tests/phpunit/data/xmp/doctype-not-included.xmp
new file mode 100644 (file)
index 0000000..9a40b4b
--- /dev/null
@@ -0,0 +1,11 @@
+<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?> <x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core
+ 4.1.3-c001 49.282696, Mon Apr 02 2007 21:16:10        ">
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+<rdf:Description
+ rdf:about=""
+ xmlns:exif="http://ns.adobe.com/exif/1.0/"
+ exif:DigitalZoomRatio="0/10">
+<exif:Flash rdf:parseType='Resource'>
+<exif:Fired>True</exif:Fired> <exif:Return>0</exif:Return> <exif:Mode>1</exif:Mode> <exif:Function>False</exif:Function> <exif:RedEyeMode>False</exif:RedEyeMode></exif:Flash> </rdf:Description> </rdf:RDF> </x:xmpmeta>
+
+<?xpacket end="w"?>
index 798e492..6758e94 100644 (file)
@@ -62,6 +62,8 @@ class XMPTest extends MediaWikiTestCase {
                        array( 'gps', 'Handling of exif GPS parameters in XMP' ),
                );
 
+               $xmpFiles[] = array( 'doctype-included', 'XMP includes doctype' );
+
                foreach ( $xmpFiles as $file ) {
                        $xmp = file_get_contents( $xmpPath . $file[0] . '.xmp' );
                        // I'm not sure if this is the best way to handle getting the
@@ -170,4 +172,52 @@ class XMPTest extends MediaWikiTestCase {
 
                $this->assertEquals( $expected, $actual );
        }
+
+       /**
+        * Test for multi-section, hostile XML
+        * @covers checkParseSafety
+        */
+       public function testCheckParseSafety() {
+
+               // Test for detection
+               $xmpPath = __DIR__ . '/../../data/xmp/';
+               $file = fopen( $xmpPath . 'doctype-included.xmp', 'rb' );
+               $valid = false;
+               $reader = new XMPReader();
+               do {
+                       $chunk = fread( $file, 10 );
+                       $valid = $reader->parse( $chunk, feof( $file ) );
+               } while ( !feof( $file ) );
+               $this->assertFalse( $valid, 'Check that doctype is detected in fragmented XML' );
+               $this->assertEquals(
+                       array(),
+                       $reader->getResults(),
+                       'Check that doctype is detected in fragmented XML'
+               );
+               fclose( $file );
+               unset( $reader );
+
+               // Test for false positives
+               $file = fopen( $xmpPath . 'doctype-not-included.xmp', 'rb' );
+               $valid = false;
+               $reader = new XMPReader();
+               do {
+                       $chunk = fread( $file, 10 );
+                       $valid = $reader->parse( $chunk, feof( $file ) );
+               } while ( !feof( $file ) );
+               $this->assertTrue(
+                       $valid,
+                       'Check for false-positive detecting doctype in fragmented XML'
+               );
+               $this->assertEquals(
+                       array(
+                               'xmp-exif' => array(
+                                       'DigitalZoomRatio' => '0/10',
+                                       'Flash' => '9'
+                               )
+                       ),
+                       $reader->getResults(),
+                       'Check that doctype is detected in fragmented XML'
+               );
+       }
 }