Merge "SpecialUserlogin: Error out when attempting to create a username with a '#'"
[lhc/web/wiklou.git] / includes / HtmlFormatter.php
index a72fd59..987a683 100644 (file)
@@ -128,15 +128,23 @@ class HtmlFormatter {
        }
 
        /**
-        * Removes content we've chosen to remove
+        * Removes content we've chosen to remove.  The text of the removed elements can be
+        * extracted with the getText method.
+        * @return array of removed DOMElements
         */
        public function filterContent() {
                wfProfileIn( __METHOD__ );
                $removals = $this->parseItemsToRemove();
 
-               if ( !$removals ) {
+               // Bail out early if nothing to do
+               if ( array_reduce( $removals,
+                       function( $carry, $item ) {
+                               return $carry && !$item;
+                       },
+                       true
+               ) ) {
                        wfProfileOut( __METHOD__ );
-                       return;
+                       return array();
                }
 
                $doc = $this->getDoc();
@@ -156,8 +164,7 @@ class HtmlFormatter {
                                }
                        }
                }
-
-               $this->removeElements( $domElemsToRemove );
+               $removed = $this->removeElements( $domElemsToRemove );
 
                // Elements with named IDs
                $domElemsToRemove = array();
@@ -167,7 +174,7 @@ class HtmlFormatter {
                                $domElemsToRemove[] = $itemToRemoveNode;
                        }
                }
-               $this->removeElements( $domElemsToRemove );
+               $removed = array_merge( $removed, $this->removeElements( $domElemsToRemove ) );
 
                // CSS Classes
                $domElemsToRemove = array();
@@ -183,7 +190,7 @@ class HtmlFormatter {
                                }
                        }
                }
-               $this->removeElements( $domElemsToRemove );
+               $removed = array_merge( $removed, $this->removeElements( $domElemsToRemove ) );
 
                // Tags with CSS Classes
                foreach ( $removals['TAG_CLASS'] as $classToRemove ) {
@@ -192,16 +199,17 @@ class HtmlFormatter {
                        $elements = $xpath->query(
                                '//' . $parts[0] . '[@class="' . $parts[1] . '"]'
                        );
-
-                       $this->removeElements( $elements );
+                       $removed = array_merge( $removed, $this->removeElements( $elements ) );
                }
 
                wfProfileOut( __METHOD__ );
+               return $removed;
        }
 
        /**
         * Removes a list of elelments from DOMDocument
         * @param array|DOMNodeList $elements
+        * @return array of removed elements
         */
        private function removeElements( $elements ) {
                $list = $elements;
@@ -217,6 +225,7 @@ class HtmlFormatter {
                                $element->parentNode->removeChild( $element );
                        }
                }
+               return $list;
        }
 
        /**
@@ -245,7 +254,10 @@ class HtmlFormatter {
        }
 
        /**
-        * Performs final transformations and returns resulting HTML
+        * Performs final transformations and returns resulting HTML.  Note that if you want to call this
+        * both without an element and with an element you should call it without an element first.  If you
+        * specify the $element in the method it'll change the underlying dom and you won't be able to get
+        * it back.
         *
         * @param DOMElement|string|null $element ID of element to get HTML from or false to get it from the whole tree
         * @return string Processed HTML
@@ -254,6 +266,7 @@ class HtmlFormatter {
                wfProfileIn( __METHOD__ );
 
                if ( $this->doc ) {
+                       wfProfileIn( __METHOD__ . '-dom' );
                        if ( $element !== null && !( $element instanceof DOMElement ) ) {
                                $element = $this->doc->getElementById( $element );
                        }
@@ -269,25 +282,31 @@ class HtmlFormatter {
                                $body->appendChild( $element );
                        }
                        $html = $this->doc->saveHTML();
+                       wfProfileOut( __METHOD__ . '-dom' );
+
+                       wfProfileIn( __METHOD__ . '-fixes' );
                        $html = $this->fixLibXml( $html );
+                       if ( wfIsWindows() ) {
+                               // Cleanup for CRLF misprocessing of unknown origin on Windows.
+                               //
+                               // If this error continues in the future, please track it down in the
+                               // XML code paths if possible and fix there.
+                               $html = str_replace( '
', '', $html );
+                       }
+                       wfProfileOut( __METHOD__ . '-fixes' );
                } else {
                        $html = $this->html;
                }
-               if ( wfIsWindows() ) {
-                       // Appears to be cleanup for CRLF misprocessing of unknown origin
-                       // when running server on Windows platform.
-                       //
-                       // If this error continues in the future, please track it down in the
-                       // XML code paths if possible and fix there.
-                       $html = str_replace( '
', '', $html );
-               }
+               // Remove stuff added by wrapHTML()
                $html = preg_replace( '/<!--.*?-->|^.*?<body>|<\/body>.*$/s', '', $html );
                $html = $this->onHtmlReady( $html );
 
+               wfProfileIn( __METHOD__ . '-flatten' );
                if ( $this->elementsToFlatten ) {
                        $elements = implode( '|', $this->elementsToFlatten );
                        $html = preg_replace( "#</?($elements)\\b[^>]*>#is", '', $html );
                }
+               wfProfileOut( __METHOD__ . '-flatten' );
 
                wfProfileOut( __METHOD__ );
                return $html;