From: Platonides Date: Tue, 26 Jan 2010 18:58:07 +0000 (+0000) Subject: Step 4: Profit!! X-Git-Tag: 1.31.0-rc.0~38098 X-Git-Url: http://git.cyclocoop.org/%24self?a=commitdiff_plain;h=11f8b8390cec0b0331f431b8a11721436a6035f8;p=lhc%2Fweb%2Fwiklou.git Step 4: Profit!! Add and use PregSplitIterator instead of a direct preg_split. Slower, but with an upper bound on memory usage. --- diff --git a/includes/StringUtils.php b/includes/StringUtils.php index c437b3c19e..bab9be4bbe 100644 --- a/includes/StringUtils.php +++ b/includes/StringUtils.php @@ -179,6 +179,14 @@ class StringUtils { return new ArrayIterator( explode( $separator, $subject ) ); } } + + /** + * Workalike for preg_split() with limited memory usage. + * Returns an Iterator + */ + static function preg_split( $pattern, $subject, $limit = -1, $flags = 0 ) { + return new PregSplitIterator( $pattern, $subject, $limit, $flags ); + } } /** @@ -409,3 +417,82 @@ class ExplodeIterator implements Iterator { } } + +/** + * An iterator which works exactly like: + * + * foreach ( preg_split( $pattern, $s, $limit, $flags ) as $element ) { + * ... + * } + * + * Except it doesn't use huge amounts of memory when $limit is -1 + * + * The flag PREG_SPLIT_OFFSET_CAPTURE isn't supported. + */ +class PregSplitIterator implements Iterator { + // The subject string + var $pattern, $subject, $originalLimit, $flags; + + // The last extracted group of items. + var $smallArray; + + // The position on the iterator. + var $curPos; + + const MAX_LIMIT = 100; + + /** + * Construct a PregSplitIterator + */ + function __construct( $pattern, $s, $limit, $flags) { + $this->pattern = $pattern; + $this->subject = $s; + $this->originalLimit = $limit; + $this->flags = $flags; + + $this->rewind(); + } + + private function effectiveLimit() { + if ($this->originalLimit == -1) { + return self::MAX_LIMIT + 1; + } else if ($this->limit > self::MAX_LIMIT) { + $this->limit -= self::MAX_LIMIT; + return self::MAX_LIMIT + 1; + } else { + $old = $this->limit; + $this->limit = 0; + return $old; + } + } + + function rewind() { + $this->curPos = 0; + $this->limit = $this->originalLimit; + if ($this->limit == -1) $this->limit = self::MAX_LIMIT; + $this->smallArray = preg_split( $this->pattern, $this->subject, $this->effectiveLimit(), $this->flags); + } + + function current() { + return $this->smallArray[$this->curPos % self::MAX_LIMIT]; + } + + function key() { + return $this->curPos; + } + + function next() { + $this->curPos++; + if ( $this->curPos % self::MAX_LIMIT == 0 ) { + # Last item contains the rest unsplitted. + if ($this->limit > 0) { + $this->smallArray = preg_split( $this->pattern, $this->smallArray[self::MAX_LIMIT], $this->effectiveLimit(), $this->flags); + } + } + return; + } + + function valid() { + return $this->curPos % self::MAX_LIMIT < count($this->smallArray); + } +} diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index e419a30df4..50e15dc4f8 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -1154,7 +1154,7 @@ class Parser # be text, and the remaining three constitute mark-up for bold text. # If there are more than 6 apostrophes in a row, assume they're all # text except for the last 6. - $arr = preg_split( "/('{2,3}(?:''')?)(?!')/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); + $arr = Stringutils::preg_split( "/('{2,3}(?:''')?)(?!')/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); # Now let's actually convert our apostrophic mush to HTML! diff --git a/tests/preg_split_test.php b/tests/preg_split_test.php new file mode 100644 index 0000000000..69c977f774 --- /dev/null +++ b/tests/preg_split_test.php @@ -0,0 +1,24 @@ +