cb(), $subject, $flags ); } /** * More or less "markup-safe" explode() * Ignores any instances of the separator inside <...> * @param string $separator * @param string $text * @return array */ static function explodeMarkup( $separator, $text ) { $placeholder = "\x00"; // Remove placeholder instances $text = str_replace( $placeholder, '', $text ); // Replace instances of the separator inside HTML-like tags with the placeholder $replacer = new DoubleReplacer( $separator, $placeholder ); $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text ); // Explode, then put the replaced separators back in $items = explode( $separator, $cleaned ); foreach( $items as $i => $str ) { $items[$i] = str_replace( $placeholder, $separator, $str ); } return $items; } /** * Escape a string to make it suitable for inclusion in a preg_replace() * replacement parameter. * * @param string $string * @return string */ static function escapeRegexReplacement( $string ) { $string = str_replace( '\\', '\\\\', $string ); $string = str_replace( '$', '\\$', $string ); return $string; } /** * Workalike for explode() with limited memory usage. * Returns an Iterator */ static function explode( $separator, $subject ) { if ( substr_count( $subject, $separator ) > 1000 ) { return new ExplodeIterator( $separator, $subject ); } else { return new ArrayIterator( explode( $separator, $subject ) ); } } /** * Workalike for preg_split() with limited memory usage. * Returns an Iterator */ static function preg_split( $pattern, $subject, $limit = -1, $flags = 0 ) { return new PregSplitIterator( $pattern, $subject, $limit, $flags ); } } /** * Base class for "replacers", objects used in preg_replace_callback() and * StringUtils::delimiterReplaceCallback() */ class Replacer { function cb() { return array( &$this, 'replace' ); } } /** * Class to replace regex matches with a string similar to that used in preg_replace() */ class RegexlikeReplacer extends Replacer { var $r; function __construct( $r ) { $this->r = $r; } function replace( $matches ) { $pairs = array(); foreach ( $matches as $i => $match ) { $pairs["\$$i"] = $match; } return strtr( $this->r, $pairs ); } } /** * Class to perform secondary replacement within each replacement string */ class DoubleReplacer extends Replacer { function __construct( $from, $to, $index = 0 ) { $this->from = $from; $this->to = $to; $this->index = $index; } function replace( $matches ) { return str_replace( $this->from, $this->to, $matches[$this->index] ); } } /** * Class to perform replacement based on a simple hashtable lookup */ class HashtableReplacer extends Replacer { var $table, $index; function __construct( $table, $index = 0 ) { $this->table = $table; $this->index = $index; } function replace( $matches ) { return $this->table[$matches[$this->index]]; } } /** * Replacement array for FSS with fallback to strtr() * Supports lazy initialisation of FSS resource */ class ReplacementArray { /*mostly private*/ var $data = false; /*mostly private*/ var $fss = false; /** * Create an object with the specified replacement array * The array should have the same form as the replacement array for strtr() */ function __construct( $data = array() ) { $this->data = $data; } function __sleep() { return array( 'data' ); } function __wakeup() { $this->fss = false; } /** * Set the whole replacement array at once */ function setArray( $data ) { $this->data = $data; $this->fss = false; } function getArray() { return $this->data; } /** * Set an element of the replacement array */ function setPair( $from, $to ) { $this->data[$from] = $to; $this->fss = false; } function mergeArray( $data ) { $this->data = array_merge( $this->data, $data ); $this->fss = false; } function merge( $other ) { $this->data = array_merge( $this->data, $other->data ); $this->fss = false; } function removePair( $from ) { unset($this->data[$from]); $this->fss = false; } function removeArray( $data ) { foreach( $data as $from => $to ) $this->removePair( $from ); $this->fss = false; } function replace( $subject ) { if ( function_exists( 'fss_prep_replace' ) ) { wfProfileIn( __METHOD__.'-fss' ); if ( $this->fss === false ) { $this->fss = fss_prep_replace( $this->data ); } $result = fss_exec_replace( $this->fss, $subject ); wfProfileOut( __METHOD__.'-fss' ); } else { wfProfileIn( __METHOD__.'-strtr' ); $result = strtr( $subject, $this->data ); wfProfileOut( __METHOD__.'-strtr' ); } return $result; } } /** * An iterator which works exactly like: * * foreach ( explode( $delim, $s ) as $element ) { * ... * } * * Except it doesn't use 193 byte per element */ class ExplodeIterator implements Iterator { // The subject string var $subject, $subjectLength; // The delimiter var $delim, $delimLength; // The position of the start of the line var $curPos; // The position after the end of the next delimiter var $endPos; // The current token var $current; /** * Construct a DelimIterator */ function __construct( $delim, $s ) { $this->subject = $s; $this->delim = $delim; // Micro-optimisation (theoretical) $this->subjectLength = strlen( $s ); $this->delimLength = strlen( $delim ); $this->rewind(); } function rewind() { $this->curPos = 0; $this->endPos = strpos( $this->subject, $this->delim ); $this->refreshCurrent(); } function refreshCurrent() { if ( $this->curPos === false ) { $this->current = false; } elseif ( $this->curPos >= $this->subjectLength ) { $this->current = ''; } elseif ( $this->endPos === false ) { $this->current = substr( $this->subject, $this->curPos ); } else { $this->current = substr( $this->subject, $this->curPos, $this->endPos - $this->curPos ); } } function current() { return $this->current; } function key() { return $this->curPos; } function next() { if ( $this->endPos === false ) { $this->curPos = false; } else { $this->curPos = $this->endPos + $this->delimLength; if ( $this->curPos >= $this->subjectLength ) { $this->endPos = false; } else { $this->endPos = strpos( $this->subject, $this->delim, $this->curPos ); } } $this->refreshCurrent(); return $this->current; } function valid() { return $this->curPos !== false; } } /** * An iterator which works exactly like: * * foreach ( preg_split( $pattern, $s, $limit, $flags ) as $element ) { * ... * } * * Except it doesn't use huge amounts of memory when $limit is -1 * * The flag PREG_SPLIT_OFFSET_CAPTURE isn't supported. */ class PregSplitIterator implements Iterator { // The subject string var $pattern, $subject, $originalLimit, $flags; // The last extracted group of items. var $smallArray; // The position on the iterator. var $curPos; const MAX_LIMIT = 100; /** * Construct a PregSplitIterator */ function __construct( $pattern, $s, $limit, $flags) { $this->pattern = $pattern; $this->subject = $s; $this->originalLimit = $limit; $this->flags = $flags; $this->rewind(); } private function effectiveLimit() { if ($this->originalLimit == -1) { return self::MAX_LIMIT + 1; } else if ($this->limit > self::MAX_LIMIT) { $this->limit -= self::MAX_LIMIT; return self::MAX_LIMIT + 1; } else { $old = $this->limit; $this->limit = 0; return $old; } } function rewind() { $this->curPos = 0; $this->limit = $this->originalLimit; if ($this->limit == -1) $this->limit = self::MAX_LIMIT; $this->smallArray = preg_split( $this->pattern, $this->subject, $this->effectiveLimit(), $this->flags); } function current() { return $this->smallArray[$this->curPos % self::MAX_LIMIT]; } function key() { return $this->curPos; } function next() { $this->curPos++; if ( $this->curPos % self::MAX_LIMIT == 0 ) { # Last item contains the rest unsplitted. if ($this->limit > 0) { $this->smallArray = preg_split( $this->pattern, $this->smallArray[self::MAX_LIMIT], $this->effectiveLimit(), $this->flags); } } return; } function valid() { return $this->curPos % self::MAX_LIMIT < count($this->smallArray); } }