private $doc;
private $html;
- private $itemsToRemove = array();
- private $elementsToFlatten = array();
+ private $itemsToRemove = [];
+ private $elementsToFlatten = [];
protected $removeMedia = false;
/**
}
// Workaround for bug that caused spaces before references
- // to disappear during processing:
- // https://bugzilla.wikimedia.org/show_bug.cgi?id=53086
- //
- // Please replace with a better fix if one can be found.
+ // to disappear during processing: https://phabricator.wikimedia.org/T55086
+ // TODO: Please replace with a better fix if one can be found.
$html = str_replace( ' <', ' <', $html );
libxml_use_internal_errors( true );
},
true
) ) {
- return array();
+ return [];
}
$doc = $this->getDoc();
// over them in a foreach loop. It will seemingly leave the internal
// iterator on the foreach out of wack and results will be quite
// strange. Though, making a queue of items to remove seems to work.
- $domElemsToRemove = array();
+ $domElemsToRemove = [];
foreach ( $removals['TAG'] as $tagToRemove ) {
$tagToRemoveNodes = $doc->getElementsByTagName( $tagToRemove );
foreach ( $tagToRemoveNodes as $tagToRemoveNode ) {
$removed = $this->removeElements( $domElemsToRemove );
// Elements with named IDs
- $domElemsToRemove = array();
+ $domElemsToRemove = [];
foreach ( $removals['ID'] as $itemToRemove ) {
$itemToRemoveNode = $doc->getElementById( $itemToRemove );
if ( $itemToRemoveNode ) {
$removed = array_merge( $removed, $this->removeElements( $domElemsToRemove ) );
// CSS Classes
- $domElemsToRemove = array();
+ $domElemsToRemove = [];
$xpath = new DOMXPath( $doc );
foreach ( $removals['CLASS'] as $classToRemove ) {
$elements = $xpath->query( '//*[contains(@class, "' . $classToRemove . '")]' );
private function removeElements( $elements ) {
$list = $elements;
if ( $elements instanceof DOMNodeList ) {
- $list = array();
+ $list = [];
foreach ( $elements as $element ) {
$list[] = $element;
}
if ( !$replacements ) {
// We don't include rules like '"' => '&quot;' because entities had already been
// normalized by libxml. Using this function with input not sanitized by libxml is UNSAFE!
- $replacements = new ReplacementArray( array(
+ $replacements = new ReplacementArray( [
'"' => '&quot;',
'&' => '&amp;',
'<' => '&lt;',
'>' => '&gt;',
- ) );
+ ] );
}
$html = $replacements->replace( $html );
}
if ( $element ) {
$body = $this->doc->getElementsByTagName( 'body' )->item( 0 );
- $nodesArray = array();
+ $nodesArray = [];
foreach ( $body->childNodes as $node ) {
$nodesArray[] = $node;
}
$html = $this->fixLibXml( $html );
if ( wfIsWindows() ) {
// Cleanup for CRLF misprocessing of unknown origin on Windows.
- //
// If this error continues in the future, please track it down in the
// XML code paths if possible and fix there.
$html = str_replace( ' ', '', $html );
* @return array
*/
protected function parseItemsToRemove() {
- $removals = array(
- 'ID' => array(),
- 'TAG' => array(),
- 'CLASS' => array(),
- 'TAG_CLASS' => array(),
- );
+ $removals = [
+ 'ID' => [],
+ 'TAG' => [],
+ 'CLASS' => [],
+ 'TAG_CLASS' => [],
+ ];
foreach ( $this->itemsToRemove as $itemToRemove ) {
$type = '';