From ff78abc1a178ead47864d0836a0287dd88c11a04 Mon Sep 17 00:00:00 2001 From: Brad Jorsch Date: Fri, 9 May 2014 16:09:03 -0400 Subject: [PATCH] Preprocessor_DOM::newPartNodeArray should check that loadXML succeeded If something manages to get invalid UTF-8 into Preprocessor_DOM::newPartNodeArray, or anything else that somehow is invalid XML, it should handle it in the same way that Preprocessor_DOM::preprocessToObj does rather than having something further down the line blow up on a PPNode_DOM with a null node. Bug: 65081 Change-Id: Ic24db455808106e17d49a11e41df33ec170f1206 --- includes/parser/Preprocessor_DOM.php | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php index ecdefb7bdb..7d8a0b6818 100644 --- a/includes/parser/Preprocessor_DOM.php +++ b/includes/parser/Preprocessor_DOM.php @@ -80,10 +80,24 @@ class Preprocessor_DOM implements Preprocessor { $xml .= ""; + wfProfileIn( __METHOD__ . '-loadXML' ); $dom = new DOMDocument(); - $dom->loadXML( $xml ); - $root = $dom->documentElement; + wfSuppressWarnings(); + $result = $dom->loadXML( $xml ); + wfRestoreWarnings(); + if ( !$result ) { + // Try running the XML through UtfNormal to get rid of invalid characters + $xml = UtfNormal::cleanUp( $xml ); + // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2 don't barf when the XML is >256 levels deep + $result = $dom->loadXML( $xml, 1 << 19 ); + } + wfProfileOut( __METHOD__ . '-loadXML' ); + if ( !$result ) { + throw new MWException( 'Parameters passed to ' . __METHOD__ . ' result in invalid XML' ); + } + + $root = $dom->documentElement; $node = new PPNode_DOM( $root->childNodes ); return $node; } -- 2.20.1