Merge "Switch Special:FileDuplicateSearch to OOUI"

[lhc/web/wiklou.git] / includes / HtmlFormatter.php
diff --git a/includes/HtmlFormatter.php b/includes/HtmlFormatter.php

index 2d81b28..206f0f7 100644 (file)
--- a/includes/HtmlFormatter.php
+++ b/includes/HtmlFormatter.php
@@ -27,8 +27,8 @@ class HtmlFormatter {
         private $doc;
  
         private $html;
-       private $itemsToRemove = array();
-       private $elementsToFlatten = array();
+       private $itemsToRemove = [];
+       private $elementsToFlatten = [];
         protected $removeMedia = false;
  
         /**
@@ -63,15 +63,9 @@ class HtmlFormatter {
          */
         public function getDoc() {
                 if ( !$this->doc ) {
-                       // DOMDocument::loadHTML apparently isn't very good with encodings, so
+                       // DOMDocument::loadHTML isn't very good with encodings, so
                         // convert input to ASCII by encoding everything above 128 as entities.
-                       if ( function_exists( 'mb_convert_encoding' ) ) {
-                               $html = mb_convert_encoding( $this->html, 'HTML-ENTITIES', 'UTF-8' );
-                       } else {
-                               $html = preg_replace_callback( '/[\x{80}-\x{10ffff}]/u', function ( $m ) {
-                                       return '&#' . UtfNormal\Utils::utf8ToCodepoint( $m[0] ) . ';';
-                               }, $this->html );
-                       }
+                       $html = mb_convert_encoding( $this->html, 'HTML-ENTITIES', 'UTF-8' );
  
                         // Workaround for bug that caused spaces before references
                         // to disappear during processing: https://phabricator.wikimedia.org/T55086
@@ -148,7 +142,7 @@ class HtmlFormatter {
                         },
                         true
                 ) ) {
-                       return array();
+                       return [];
                 }
  
                 $doc = $this->getDoc();
@@ -159,7 +153,7 @@ class HtmlFormatter {
                 // over them in a foreach loop. It will seemingly leave the internal
                 // iterator on the foreach out of wack and results will be quite
                 // strange. Though, making a queue of items to remove seems to work.
-               $domElemsToRemove = array();
+               $domElemsToRemove = [];
                 foreach ( $removals['TAG'] as $tagToRemove ) {
                         $tagToRemoveNodes = $doc->getElementsByTagName( $tagToRemove );
                         foreach ( $tagToRemoveNodes as $tagToRemoveNode ) {
@@ -171,7 +165,7 @@ class HtmlFormatter {
                 $removed = $this->removeElements( $domElemsToRemove );
  
                 // Elements with named IDs
-               $domElemsToRemove = array();
+               $domElemsToRemove = [];
                 foreach ( $removals['ID'] as $itemToRemove ) {
                         $itemToRemoveNode = $doc->getElementById( $itemToRemove );
                         if ( $itemToRemoveNode ) {
@@ -181,7 +175,7 @@ class HtmlFormatter {
                 $removed = array_merge( $removed, $this->removeElements( $domElemsToRemove ) );
  
                 // CSS Classes
-               $domElemsToRemove = array();
+               $domElemsToRemove = [];
                 $xpath = new DOMXPath( $doc );
                 foreach ( $removals['CLASS'] as $classToRemove ) {
                         $elements = $xpath->query( '//*[contains(@class, "' . $classToRemove . '")]' );
@@ -217,7 +211,7 @@ class HtmlFormatter {
         private function removeElements( $elements ) {
                 $list = $elements;
                 if ( $elements instanceof DOMNodeList ) {
-                       $list = array();
+                       $list = [];
                         foreach ( $elements as $element ) {
                                 $list[] = $element;
                         }
@@ -242,22 +236,19 @@ class HtmlFormatter {
                 if ( !$replacements ) {
                         // We don't include rules like '&#34;' => '&amp;quot;' because entities had already been
                         // normalized by libxml. Using this function with input not sanitized by libxml is UNSAFE!
-                       $replacements = new ReplacementArray( array(
+                       $replacements = new ReplacementArray( [
                                 '&quot;' => '&amp;quot;',
                                 '&amp;' => '&amp;amp;',
                                 '&lt;' => '&amp;lt;',
                                 '&gt;' => '&amp;gt;',
-                       ) );
+                       ] );
                 }
                 $html = $replacements->replace( $html );
  
-               if ( function_exists( 'mb_convert_encoding' ) ) {
-                       // Just in case the conversion in getDoc() above used named
-                       // entities that aren't known to html_entity_decode().
-                       $html = mb_convert_encoding( $html, 'UTF-8', 'HTML-ENTITIES' );
-               } else {
-                       $html = html_entity_decode( $html, ENT_COMPAT, 'utf-8' );
-               }
+               // Just in case the conversion in getDoc() above used named
+               // entities that aren't known to html_entity_decode().
+               $html = mb_convert_encoding( $html, 'UTF-8', 'HTML-ENTITIES' );
+
                 return $html;
         }
  
@@ -279,7 +270,7 @@ class HtmlFormatter {
                         }
                         if ( $element ) {
                                 $body = $this->doc->getElementsByTagName( 'body' )->item( 0 );
-                               $nodesArray = array();
+                               $nodesArray = [];
                                 foreach ( $body->childNodes as $node ) {
                                         $nodesArray[] = $node;
                                 }
@@ -290,7 +281,7 @@ class HtmlFormatter {
                         }
                         $html = $this->doc->saveHTML();
  
-                       $html = $this->fixLibXml( $html );
+                       $html = $this->fixLibXML( $html );
                         if ( wfIsWindows() ) {
                                 // Cleanup for CRLF misprocessing of unknown origin on Windows.
                                 // If this error continues in the future, please track it down in the
@@ -349,12 +340,12 @@ class HtmlFormatter {
          * @return array
          */
         protected function parseItemsToRemove() {
-               $removals = array(
-                       'ID' => array(),
-                       'TAG' => array(),
-                       'CLASS' => array(),
-                       'TAG_CLASS' => array(),
-               );
+               $removals = [
+                       'ID' => [],
+                       'TAG' => [],
+                       'CLASS' => [],
+                       'TAG_CLASS' => [],
+               ];
  
                 foreach ( $this->itemsToRemove as $itemToRemove ) {
                         $type = '';