}
/**
- * Removes content we've chosen to remove
+ * Removes content we've chosen to remove. The text of the removed elements can be
+ * extracted with the getText method.
+ * @return array of removed DOMElements
*/
public function filterContent() {
wfProfileIn( __METHOD__ );
}
}
}
-
- $this->removeElements( $domElemsToRemove );
+ $removed = $this->removeElements( $domElemsToRemove );
// Elements with named IDs
$domElemsToRemove = array();
$domElemsToRemove[] = $itemToRemoveNode;
}
}
- $this->removeElements( $domElemsToRemove );
+ $removed = array_merge( $removed, $this->removeElements( $domElemsToRemove ) );
// CSS Classes
$domElemsToRemove = array();
}
}
}
- $this->removeElements( $domElemsToRemove );
+ $removed = array_merge( $removed, $this->removeElements( $domElemsToRemove ) );
// Tags with CSS Classes
foreach ( $removals['TAG_CLASS'] as $classToRemove ) {
$elements = $xpath->query(
'//' . $parts[0] . '[@class="' . $parts[1] . '"]'
);
-
- $this->removeElements( $elements );
+ $removed = array_merge( $removed, $this->removeElements( $elements ) );
}
wfProfileOut( __METHOD__ );
+ return $removed;
}
/**
* Removes a list of elelments from DOMDocument
* @param array|DOMNodeList $elements
+ * @return array of removed elements
*/
private function removeElements( $elements ) {
$list = $elements;
$element->parentNode->removeChild( $element );
}
}
+ return $list;
}
/**
}
/**
- * Performs final transformations and returns resulting HTML
+ * Performs final transformations and returns resulting HTML. Note that if you want to call this
+ * both without an element and with an element you should call it without an element first. If you
+ * specify the $element in the method it'll change the underlying dom and you won't be able to get
+ * it back.
*
* @param DOMElement|string|null $element ID of element to get HTML from or false to get it from the whole tree
* @return string Processed HTML
* @dataProvider getHtmlData
* @covers HtmlFormatter::getText
*/
- public function testTransform( $input, $expected, $callback = false ) {
+ public function testTransform( $input, $expectedText, $expectedRemoved = array(), $callback = false ) {
$input = self::normalize( $input );
$formatter = new HtmlFormatter( HtmlFormatter::wrapHTML( $input ) );
if ( $callback ) {
$callback( $formatter );
}
- $formatter->filterContent();
+ $removedElements = $formatter->filterContent();
$html = $formatter->getText();
+ $removed = array();
+ foreach ( $removedElements as $removedElement ) {
+ $removed[] = self::normalize( $formatter->getText( $removedElement ) );
+ }
+ $expectedRemoved = array_map( 'self::normalize', $expectedRemoved );
$this->assertValidHtmlSnippet( $html );
- $this->assertEquals( self::normalize( $expected ), self::normalize( $html ) );
+ $this->assertEquals( self::normalize( $expectedText ), self::normalize( $html ) );
+ $this->assertEquals( asort( $expectedRemoved ), asort( $removed ) );
}
private static function normalize( $s ) {
array(
'<img src="/foo/bar.jpg" alt="Blah"/>',
'',
+ array( '<img src="/foo/bar.jpg" alt="Blah">' ),
$removeImages,
),
// basic tag removal
'<table><tr><td>foo</td></tr></table><div class="foo">foo</div><div class="foo quux">foo</div><span id="bar">bar</span>
<strong class="foo" id="bar">foobar</strong><div class="notfoo">test</div><div class="baz"/>
<span class="baz">baz</span>',
-
'<div class="notfoo">test</div>
<span class="baz">baz</span>',
+ array(
+ '<table><tr><td>foo</td></tr></table>',
+ '<div class="foo">foo</div>',
+ '<div class="foo quux">foo</div>',
+ '<span id="bar">bar</span>',
+ '<strong class="foo" id="bar">foobar</strong>',
+ '<div class="baz"/>',
+ ),
$removeTags,
),
// don't flatten tags that start like chosen ones
array(
'<div><s>foo</s> <span>bar</span></div>',
'foo <span>bar</span>',
+ array(),
$flattenSomeStuff,
),
// total flattening
array(
'<div style="foo">bar<sup>2</sup></div>',
'bar2',
+ array(),
$flattenEverything,
),
// UTF-8 preservation and security