Merge "(bug 32348) Allow descending order for list=alllinks"

[lhc/web/wiklou.git] / includes / StringUtils.php
diff --git a/includes/StringUtils.php b/includes/StringUtils.php

index 1e34890..3b500ae 100644 (file)
--- a/includes/StringUtils.php
+++ b/includes/StringUtils.php
@@ -1,4 +1,25 @@
  <?php
+/**
+ * Methods to play with strings.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
  /**
   * A collection of static methods to play with strings.
   */
@@ -13,6 +34,13 @@ class StringUtils {
          * Compared to delimiterReplace(), this implementation is fast but memory-
          * hungry and inflexible. The memory requirements are such that I don't
          * recommend using it on anything but guaranteed small chunks of text.
+        *
+        * @param $startDelim
+        * @param $endDelim
+        * @param $replace
+        * @param $subject
+        *
+        * @return string
          */
         static function hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject ) {
                 $segments = explode( $startDelim, $subject );
@@ -36,13 +64,20 @@ class StringUtils {
          * This implementation is slower than hungryDelimiterReplace but uses far less
          * memory. The delimiters are literal strings, not regular expressions.
          *
-        * @param string $flags Regular expression flags
+        * If the start delimiter ends with an initial substring of the end delimiter,
+        * e.g. in the case of C-style comments, the behaviour differs from the model
+        * regex. In this implementation, the end must share no characters with the
+        * start, so e.g. /*\/ is not considered to be both the start and end of a
+        * comment. /*\/xy/*\/ is considered to be a single comment with contents /xy/.
+        *
+        * @param $startDelim String: start delimiter
+        * @param $endDelim String: end delimiter
+        * @param $callback Callback: function to call on each match
+        * @param $subject String
+        * @param $flags String: regular expression flags
+        * @throws MWException
+        * @return string
          */
-       # If the start delimiter ends with an initial substring of the end delimiter,
-       # e.g. in the case of C-style comments, the behaviour differs from the model
-       # regex. In this implementation, the end must share no characters with the
-       # start, so e.g. /*/ is not considered to be both the start and end of a
-       # comment. /*/xy/*/ is considered to be a single comment with contents /xy/.
         static function delimiterReplaceCallback( $startDelim, $endDelim, $callback, $subject, $flags = '' ) {
                 $inputPos = 0;
                 $outputPos = 0;
@@ -77,16 +112,20 @@ class StringUtils {
                         }
  
                         if ( $tokenType == 'start' ) {
-                               $inputPos = $tokenOffset + $tokenLength;
                                 # Only move the start position if we haven't already found a start
                                 # This means that START START END matches outer pair
                                 if ( !$foundStart ) {
                                         # Found start
+                                       $inputPos = $tokenOffset + $tokenLength;
                                         # Write out the non-matching section
                                         $output .= substr( $subject, $outputPos, $tokenOffset - $outputPos );
                                         $outputPos = $tokenOffset;
                                         $contentPos = $inputPos;
                                         $foundStart = true;
+                               } else {
+                                       # Move the input position past the *first character* of START,
+                                       # to protect against missing END when it overlaps with START
+                                       $inputPos = $tokenOffset + 1;
                                 }
                         } elseif ( $tokenType == 'end' ) {
                                 if ( $foundStart ) {
@@ -111,17 +150,18 @@ class StringUtils {
                 return $output;
         }
  
-       /*
+       /**
          * Perform an operation equivalent to
          *
          *   preg_replace( "!$startDelim(.*)$endDelim!$flags", $replace, $subject )
          *
-        * @param string $startDelim Start delimiter regular expression
-        * @param string $endDelim End delimiter regular expression
-        * @param string $replace Replacement string. May contain $1, which will be
-        *               replaced by the text between the delimiters
-        * @param string $subject String to search
-        * @return string The string with the matches replaced
+        * @param $startDelim String: start delimiter regular expression
+        * @param $endDelim String: end delimiter regular expression
+        * @param $replace String: replacement string. May contain $1, which will be
+        *                 replaced by the text between the delimiters
+        * @param $subject String to search
+        * @param $flags String: regular expression flags
+        * @return String: The string with the matches replaced
          */
         static function delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags = '' ) {
                 $replacer = new RegexlikeReplacer( $replace );
@@ -132,8 +172,8 @@ class StringUtils {
         /**
          * More or less "markup-safe" explode()
          * Ignores any instances of the separator inside <...>
-        * @param string $separator
-        * @param string $text
+        * @param $separator String
+        * @param $text String
          * @return array
          */
         static function explodeMarkup( $separator, $text ) {
@@ -159,8 +199,8 @@ class StringUtils {
          * Escape a string to make it suitable for inclusion in a preg_replace()
          * replacement parameter.
          *
-        * @param string $string
-        * @return string
+        * @param $string String
+        * @return String
          */
         static function escapeRegexReplacement( $string ) {
                 $string = str_replace( '\\', '\\\\', $string );
@@ -171,6 +211,9 @@ class StringUtils {
         /**
          * Workalike for explode() with limited memory usage.
          * Returns an Iterator
+        * @param $separator
+        * @param $subject
+        * @return ArrayIterator|\ExplodeIterator
          */
         static function explode( $separator, $subject ) {
                 if ( substr_count( $subject, $separator ) > 1000 ) {
@@ -179,86 +222,6 @@ class StringUtils {
                         return new ArrayIterator( explode( $separator, $subject ) );
                 }
         }
-
-       /**
-        * Clean characters that are invalid in the given character set 
-        * from a given string.
-        * 
-        * @param $string \type{$string} String to clean
-        * @param $charset \type{$string} Character set (if unspecified, assume $wgOutputEncoding)
-        * @return \type{$string} Cleaned string
-        */
-       public static function cleanForCharset( $string, $charset='' ) {
-               global $wgOutputEncoding;
-               switch ( $charset ? $charset : $wgOutputEncoding ) {
-                       # UTF-8 should be all we need to worry about. :)
-               case 'UTF-8':
-                       return self::cleanUtf8( $string );
-               default:
-                       return $string;
-               }
-       }
-
-       /**
-        * Clean invalid UTF-8 characters and sequences from a given string,
-        * replacing them with U+FFFD.
-        * Should be RFC 3629 compliant.
-        * 
-        * @param $string \type{$string} String to clean
-        * @return \type{$string} Cleaned string
-        */
-       private static function cleanUtf8( $str ) {
-               # HERE BE DRAGONS!
-               # ABANDON ALL HOPE, ALL YE WHO ENTER THE BITWISE HELLFIRE.
-
-               $illegal = array( 0xD800, 0xDB7F, 0xDB80, 0xDBFF,
-                                 0xDC00, 0xDF80, 0xDFFF, 0xFFFE, 0xFFFF );
-               $len = strlen( $str );
-               $left = $bytes = 0;
-               for ( $i = 0; $i < $len; $i++ ) {
-                       $ch = ord( $str[$i] );
-                       if ( !$left ) {
-                               if ( !($ch & 0x80 ) )
-                                       continue;
-                               $left = (( $ch & 0xFE ) == 0xFC ? 5 :
-                                       (( $ch & 0xFC ) == 0xF8 ? 4 :
-                                       (( $ch & 0xF8 ) == 0xF0 ? 3 :
-                                       (( $ch & 0xF0 ) == 0xE0 ? 2 :
-                                       (( $ch & 0xE0 ) == 0xC0 ? 1 :
-                                                                 0 )))));
-                               if ( $left ) {
-                                       $bytes = $left + 1;
-                                       $sum = $ch & ( 0xFF >> $bytes + 1 );
-                                       continue;
-                               } else if ( $ch & 0x80 ) {
-                                       $bytes = 1;
-                               }
-                       } else if ( ( $ch & 0xC0 ) == 0x80 ) {
-                               $sum <<= 6;
-                               $sum += $ch & 0x3F;
-                               if ( --$left ) continue;
-                               if ( ( $bytes == 2 && $sum < 0x80     ) ||
-                                    ( $bytes == 3 && $sum < 0x800    ) ||
-                                    ( $bytes == 4 && $sum < 0x10000  ) ||
-                                    ( $bytes >  4 || $sum > 0x10FFFF ) ||
-                                    in_array( $sum, $illegal ) ) {
-                               } else continue;
-                               
-                       } else {
-                               $bytes -= $left;
-                               $i--;
-                       }
-
-                       $str = ( substr( $str, 0, $i - $bytes + 1 ) .
-                                "\xEF\xBF\xBD" .
-                                substr( $str, $i + 1 ) );
-                       $i   += 3 - $bytes;
-                       $len += 3 - $bytes;
-                       $left = 0;
-               }
-
-               return $str;
-       }
  }
  
  /**
@@ -266,6 +229,10 @@ class StringUtils {
   * StringUtils::delimiterReplaceCallback()
   */
  class Replacer {
+
+       /**
+        * @return array
+        */
         function cb() {
                 return array( &$this, 'replace' );
         }
@@ -276,10 +243,18 @@ class Replacer {
   */
  class RegexlikeReplacer extends Replacer {
         var $r;
+
+       /**
+        * @param $r string
+        */
         function __construct( $r ) {
                 $this->r = $r;
         }
  
+       /**
+        * @param $matches array
+        * @return string
+        */
         function replace( $matches ) {
                 $pairs = array();
                 foreach ( $matches as $i => $match ) {
@@ -294,12 +269,22 @@ class RegexlikeReplacer extends Replacer {
   * Class to perform secondary replacement within each replacement string
   */
  class DoubleReplacer extends Replacer {
+
+       /**
+        * @param $from
+        * @param $to
+        * @param $index int
+        */
         function __construct( $from, $to, $index = 0 ) {
                 $this->from = $from;
                 $this->to = $to;
                 $this->index = $index;
         }
  
+       /**
+        * @param $matches array
+        * @return mixed
+        */
         function replace( $matches ) {
                 return str_replace( $this->from, $this->to, $matches[$this->index] );
         }
@@ -311,11 +296,19 @@ class DoubleReplacer extends Replacer {
  class HashtableReplacer extends Replacer {
         var $table, $index;
  
+       /**
+        * @param $table
+        * @param $index int
+        */
         function __construct( $table, $index = 0 ) {
                 $this->table = $table;
                 $this->index = $index;
         }
  
+       /**
+        * @param $matches array
+        * @return mixed
+        */
         function replace( $matches ) {
                 return $this->table[$matches[$this->index]];
         }
@@ -332,11 +325,15 @@ class ReplacementArray {
         /**
          * Create an object with the specified replacement array
          * The array should have the same form as the replacement array for strtr()
+        * @param array $data
          */
         function __construct( $data = array() ) {
                 $this->data = $data;
         }
  
+       /**
+        * @return array
+        */
         function __sleep() {
                 return array( 'data' );
         }
@@ -353,39 +350,61 @@ class ReplacementArray {
                 $this->fss = false;
         }
  
+       /**
+        * @return array|bool
+        */
         function getArray() {
                 return $this->data;
         }
  
         /**
          * Set an element of the replacement array
+        * @param $from string
+        * @param $to stromg
          */
         function setPair( $from, $to ) {
                 $this->data[$from] = $to;
                 $this->fss = false;
         }
  
+       /**
+        * @param $data array
+        */
         function mergeArray( $data ) {
                 $this->data = array_merge( $this->data, $data );
                 $this->fss = false;
         }
  
+       /**
+        * @param $other
+        */
         function merge( $other ) {
                 $this->data = array_merge( $this->data, $other->data );
                 $this->fss = false;
         }
  
+       /**
+        * @param $from string
+        */
         function removePair( $from ) {
                 unset($this->data[$from]);
                 $this->fss = false;
         }
  
+       /**
+        * @param $data array
+        */
         function removeArray( $data ) {
-               foreach( $data as $from => $to )
+               foreach( $data as $from => $to ) {
                         $this->removePair( $from );
+               }
                 $this->fss = false;
         }
  
+       /**
+        * @param $subject string
+        * @return string
+        */
         function replace( $subject ) {
                 if ( function_exists( 'fss_prep_replace' ) ) {
                         wfProfileIn( __METHOD__.'-fss' );
@@ -428,8 +447,10 @@ class ExplodeIterator implements Iterator {
         // The current token
         var $current;
  
-       /** 
+       /**
          * Construct a DelimIterator
+        * @param $delim string
+        * @param $s string
          */
         function __construct( $delim, $s ) {
                 $this->subject = $s;
@@ -448,7 +469,6 @@ class ExplodeIterator implements Iterator {
                 $this->refreshCurrent();
         }
  
-
         function refreshCurrent() {
                 if ( $this->curPos === false ) {
                         $this->current = false;
@@ -469,6 +489,9 @@ class ExplodeIterator implements Iterator {
                 return $this->curPos;
         }
  
+       /**
+        * @return string
+        */
         function next() {
                 if ( $this->endPos === false ) {
                         $this->curPos = false;
@@ -484,8 +507,10 @@ class ExplodeIterator implements Iterator {
                 return $this->current;
         }
  
+       /**
+        * @return bool
+        */
         function valid() {
                 return $this->curPos !== false;
         }
  }
-