From: Aryeh Gregor Date: Fri, 4 Dec 2009 19:39:15 +0000 (+0000) Subject: Add DTD to fix well-formedness errors in HTML5 X-Git-Tag: 1.31.0-rc.0~38622 X-Git-Url: http://git.cyclocoop.org//%27%40script%40/%27?a=commitdiff_plain;h=36f4e6822cafa129078263bdbf67486cb9e8bed1;p=lhc%2Fweb%2Fwiklou.git Add DTD to fix well-formedness errors in HTML5 Now actually tested, using Python's SAX module. You can verify that a page is well-formed XML (or at least won't break in pywikipediabot) with a program like this: import xml.sax class Myhandler(xml.sax.ContentHandler): pass h = Myhandler() xml.sax.parse("http://localhost/git-trunk/phase3/index.php?title=Special:UserLogin", h) If the page is not well-formed, this will throw an exception. It did with the old doctype, but no longer does if $wgWellFormedXml == true. --- diff --git a/includes/OutputPage.php b/includes/OutputPage.php index 86220002bc..3be8a74e0d 100644 --- a/includes/OutputPage.php +++ b/includes/OutputPage.php @@ -1567,7 +1567,7 @@ class OutputPage { public function headElement( Skin $sk, $includeStyle = true ) { global $wgDocType, $wgDTD, $wgContLanguageCode, $wgOutputEncoding, $wgMimeType; global $wgXhtmlDefaultNamespace, $wgXhtmlNamespaces, $wgHtml5Version; - global $wgContLang, $wgUseTrackbacks, $wgStyleVersion, $wgHtml5; + global $wgContLang, $wgUseTrackbacks, $wgStyleVersion, $wgHtml5, $wgWellFormedXml; $this->addMeta( "http:Content-Type", "$wgMimeType; charset={$wgOutputEncoding}" ); if ( $sk->commonPrintStylesheet() ) { @@ -1588,9 +1588,21 @@ class OutputPage { $dir = $wgContLang->getDir(); if ( $wgHtml5 ) { - $ret .= "\n"; + if ( $wgWellFormedXml ) { + # Unknown elements and attributes are okay in XML, but unknown + # named entities are well-formedness errors and will break XML + # parsers. Thus we need a doctype that gives us appropriate + # entity definitions. The HTML5 spec permits four legacy + # doctypes as obsolete but conforming, so let's pick one of + # those, although it makes our pages look like XHTML1 Strict. + # Isn't compatibility great? + $ret .= "\n"; + } else { + # Much saner. + $ret .= "\n"; + } $ret .= "\n";