Preprocessor_DOM::newPartNodeArray should check that loadXML succeeded
authorBrad Jorsch <bjorsch@wikimedia.org>
Fri, 9 May 2014 20:09:03 +0000 (16:09 -0400)
committerTim Starling <tstarling@wikimedia.org>
Mon, 12 May 2014 03:44:23 +0000 (03:44 +0000)
If something manages to get invalid UTF-8 into
Preprocessor_DOM::newPartNodeArray, or anything else that somehow is
invalid XML, it should handle it in the same way that
Preprocessor_DOM::preprocessToObj does rather than having something
further down the line blow up on a PPNode_DOM with a null node.

Bug: 65081
Change-Id: Ic24db455808106e17d49a11e41df33ec170f1206

includes/parser/Preprocessor_DOM.php

index ecdefb7..7d8a0b6 100644 (file)
@@ -80,10 +80,24 @@ class Preprocessor_DOM implements Preprocessor {
 
                $xml .= "</list>";
 
+               wfProfileIn( __METHOD__ . '-loadXML' );
                $dom = new DOMDocument();
-               $dom->loadXML( $xml );
-               $root = $dom->documentElement;
+               wfSuppressWarnings();
+               $result = $dom->loadXML( $xml );
+               wfRestoreWarnings();
+               if ( !$result ) {
+                       // Try running the XML through UtfNormal to get rid of invalid characters
+                       $xml = UtfNormal::cleanUp( $xml );
+                       // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2 don't barf when the XML is >256 levels deep
+                       $result = $dom->loadXML( $xml, 1 << 19 );
+               }
+               wfProfileOut( __METHOD__ . '-loadXML' );
 
+               if ( !$result ) {
+                       throw new MWException( 'Parameters passed to ' . __METHOD__ . ' result in invalid XML' );
+               }
+
+               $root = $dom->documentElement;
                $node = new PPNode_DOM( $root->childNodes );
                return $node;
        }