From eb7e4881ea7287cae89e6082334104c82b0a9ac3 Mon Sep 17 00:00:00 2001 From: Roan Kattouw Date: Fri, 9 Sep 2011 11:28:00 +0000 Subject: [PATCH] Commit live hack: pass XML_PARSE_HUGE (code uses 1 << 19 because the constant isn't available for some reason) into DOMDocument::loadXML() if the first call to loadXML() failed. This prevents newer versions of libxml2 from throwing a warning and messing up when the XML contains structures that are nested more than 256 levels deep. RELEASE-NOTES added to the 1.18 file, tagging this for backporting to 1.18 too. We at Wikimedia never noticed this issue until we upgraded libxml2 on one of our servers as part of an OS upgrade, but apparently the interwebs knew about this since at least May 2010. Hat tip to http://deriksmith.livejournal.com/57617.html , where I found this fix. --- RELEASE-NOTES-1.18 | 2 ++ includes/parser/Preprocessor_DOM.php | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/RELEASE-NOTES-1.18 b/RELEASE-NOTES-1.18 index d0119054d5..19daaa1c12 100644 --- a/RELEASE-NOTES-1.18 +++ b/RELEASE-NOTES-1.18 @@ -444,6 +444,8 @@ production. #REDIRECT [[Foo]] is invalid JS * Tracking categories are no longer shown in footer for special pages * $wgOverrideSiteFeed no longer double escapes urls. +* The preprocessor no longer fails with a PHP warning about XML_PARSE_HUGE when + processing complex pages using newer versions of libxml2. === API changes in 1.18 === * BREAKING CHANGE: action=watch now requires POST and token. diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php index dcda8a211f..e6c1d674ab 100644 --- a/includes/parser/Preprocessor_DOM.php +++ b/includes/parser/Preprocessor_DOM.php @@ -155,7 +155,8 @@ class Preprocessor_DOM implements Preprocessor { if ( !$result ) { // Try running the XML through UtfNormal to get rid of invalid characters $xml = UtfNormal::cleanUp( $xml ); - $result = $dom->loadXML( $xml ); + // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2 don't barf when the XML is >256 levels deep + $result = $dom->loadXML( $xml, 1 << 19 ); if ( !$result ) { throw new MWException( __METHOD__.' generated invalid XML' ); } -- 2.20.1