Split CLDRPluralRuleEvaluator file
authoraddshore <addshorewiki@gmail.com>
Tue, 8 Apr 2014 22:50:31 +0000 (23:50 +0100)
committeraddshore <addshorewiki@gmail.com>
Tue, 8 Apr 2014 22:50:31 +0000 (23:50 +0100)
Change-Id: Iec9a939e3f28a600c841b89a2849f65d41763a62

includes/AutoLoader.php
languages/utils/CLDRPluralRuleConverter.php [new file with mode: 0644]
languages/utils/CLDRPluralRuleConverter_Expression.php [new file with mode: 0644]
languages/utils/CLDRPluralRuleConverter_Fragment.php [new file with mode: 0644]
languages/utils/CLDRPluralRuleConverter_Operator.php [new file with mode: 0644]
languages/utils/CLDRPluralRuleError.php [new file with mode: 0644]
languages/utils/CLDRPluralRuleEvaluator.php
languages/utils/CLDRPluralRuleEvaluator_Range.php [new file with mode: 0644]

index 52b292f..4a4cd59 100644 (file)
@@ -1125,13 +1125,13 @@ $wgAutoloadLocalClasses = array(
        'FakeConverter' => 'languages/Language.php',
        'Language' => 'languages/Language.php',
        'LanguageConverter' => 'languages/LanguageConverter.php',
-       'CLDRPluralRuleConverter' => 'languages/utils/CLDRPluralRuleEvaluator.php',
-       'CLDRPluralRuleConverter_Expression' => 'languages/utils/CLDRPluralRuleEvaluator.php',
-       'CLDRPluralRuleConverter_Fragment' => 'languages/utils/CLDRPluralRuleEvaluator.php',
-       'CLDRPluralRuleConverter_Operator' => 'languages/utils/CLDRPluralRuleEvaluator.php',
+       'CLDRPluralRuleConverter' => 'languages/utils/CLDRPluralRuleConverter.php',
+       'CLDRPluralRuleConverter_Expression' => 'languages/utils/CLDRPluralRuleConverter_Expression.php',
+       'CLDRPluralRuleConverter_Fragment' => 'languages/utils/CLDRPluralRuleConverter_Fragment.php',
+       'CLDRPluralRuleConverter_Operator' => 'languages/utils/CLDRPluralRuleConverter_Operator.php',
        'CLDRPluralRuleEvaluator' => 'languages/utils/CLDRPluralRuleEvaluator.php',
-       'CLDRPluralRuleEvaluator_Range' => 'languages/utils/CLDRPluralRuleEvaluator.php',
-       'CLDRPluralRuleError' => 'languages/utils/CLDRPluralRuleEvaluator.php',
+       'CLDRPluralRuleEvaluator_Range' => 'languages/utils/CLDRPluralRuleEvaluator_Range.php',
+       'CLDRPluralRuleError' => 'languages/utils/CLDRPluralRuleError.php',
 
        # maintenance
        'BackupDumper' => 'maintenance/backup.inc',
diff --git a/languages/utils/CLDRPluralRuleConverter.php b/languages/utils/CLDRPluralRuleConverter.php
new file mode 100644 (file)
index 0000000..65d57e0
--- /dev/null
@@ -0,0 +1,313 @@
+<?php
+
+/**
+ * @author Niklas Laxström, Tim Starling
+ *
+ * @copyright Copyright © 2010-2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
+ *
+ * @file
+ * @since 1.20
+ */
+
+/**
+ * Helper class for converting rules to reverse polish notation (RPN).
+ */
+class CLDRPluralRuleConverter {
+       /**
+        * The input string
+        *
+        * @var string
+        */
+       public $rule;
+
+       /**
+        * The current position
+        *
+        * @var int
+        */
+       public $pos;
+
+       /**
+        * The past-the-end position
+        *
+        * @var int
+        */
+       public $end;
+
+       /**
+        * The operator stack
+        *
+        * @var array
+        */
+       public $operators = array();
+
+       /**
+        * The operand stack
+        *
+        * @var array
+        */
+       public $operands = array();
+
+       /**
+        * Precedence levels. Note that there's no need to worry about associativity
+        * for the level 4 operators, since they return boolean and don't accept
+        * boolean inputs.
+        */
+       static $precedence = array(
+               'or' => 2,
+               'and' => 3,
+               'is' => 4,
+               'is-not' => 4,
+               'in' => 4,
+               'not-in' => 4,
+               'within' => 4,
+               'not-within' => 4,
+               'mod' => 5,
+               ',' => 6,
+               '..' => 7,
+       );
+
+       /**
+        * A character list defining whitespace, for use in strspn() etc.
+        */
+       const WHITESPACE_CLASS = " \t\r\n";
+
+       /**
+        * Same for digits. Note that the grammar given in UTS #35 doesn't allow
+        * negative numbers or decimal separators.
+        */
+       const NUMBER_CLASS = '0123456789';
+
+       /**
+        * A character list of symbolic operands.
+        */
+       const OPERAND_SYMBOLS = 'nivwft';
+
+       /**
+        * An anchored regular expression which matches a word at the current offset.
+        */
+       const WORD_REGEX = '/[a-zA-Z@]+/A';
+
+       /**
+        * Convert a rule to RPN. This is the only public entry point.
+        *
+        * @param string $rule The rule to convert
+        * @return string The RPN representation of the rule
+        */
+       public static function convert( $rule ) {
+               $parser = new self( $rule );
+               return $parser->doConvert();
+       }
+
+       /**
+        * Private constructor.
+        */
+       protected function __construct( $rule ) {
+               $this->rule = $rule;
+               $this->pos = 0;
+               $this->end = strlen( $rule );
+       }
+
+       /**
+        * Do the operation.
+        *
+        * @return string The RPN representation of the rule (e.g. "5 3 mod n is")
+        */
+       protected function doConvert() {
+               $expectOperator = true;
+
+               // Iterate through all tokens, saving the operators and operands to a
+               // stack per Dijkstra's shunting yard algorithm.
+               /** @var CLDRPluralRuleConverter_Operator $token */
+               while ( false !== ( $token = $this->nextToken() ) ) {
+                       // In this grammar, there are only binary operators, so every valid
+                       // rule string will alternate between operator and operand tokens.
+                       $expectOperator = !$expectOperator;
+
+                       if ( $token instanceof CLDRPluralRuleConverter_Expression ) {
+                               // Operand
+                               if ( $expectOperator ) {
+                                       $token->error( 'unexpected operand' );
+                               }
+                               $this->operands[] = $token;
+                               continue;
+                       } else {
+                               // Operator
+                               if ( !$expectOperator ) {
+                                       $token->error( 'unexpected operator' );
+                               }
+                               // Resolve higher precedence levels
+                               $lastOp = end( $this->operators );
+                               while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) {
+                                       $this->doOperation( $lastOp, $this->operands );
+                                       array_pop( $this->operators );
+                                       $lastOp = end( $this->operators );
+                               }
+                               $this->operators[] = $token;
+                       }
+               }
+
+               // Finish off the stack
+               while ( $op = array_pop( $this->operators ) ) {
+                       $this->doOperation( $op, $this->operands );
+               }
+
+               // Make sure the result is sane. The first case is possible for an empty
+               // string input, the second should be unreachable.
+               if ( !count( $this->operands ) ) {
+                       $this->error( 'condition expected' );
+               } elseif ( count( $this->operands ) > 1 ) {
+                       $this->error( 'missing operator or too many operands' );
+               }
+
+               $value = $this->operands[0];
+               if ( $value->type !== 'boolean' ) {
+                       $this->error( 'the result must have a boolean type' );
+               }
+
+               return $this->operands[0]->rpn;
+       }
+
+       /**
+        * Fetch the next token from the input string.
+        *
+        * @return CLDRPluralRuleConverter_Fragment The next token
+        */
+       protected function nextToken() {
+               if ( $this->pos >= $this->end ) {
+                       return false;
+               }
+
+               // Whitespace
+               $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos );
+               $this->pos += $length;
+
+               if ( $this->pos >= $this->end ) {
+                       return false;
+               }
+
+               // Number
+               $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos );
+               if ( $length !== 0 ) {
+                       $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos );
+                       $this->pos += $length;
+                       return $token;
+               }
+
+               // Two-character operators
+               $op2 = substr( $this->rule, $this->pos, 2 );
+               if ( $op2 === '..' || $op2 === '!=' ) {
+                       $token = $this->newOperator( $op2, $this->pos, 2 );
+                       $this->pos += 2;
+                       return $token;
+               }
+
+               // Single-character operators
+               $op1 = $this->rule[$this->pos];
+               if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) {
+                       $token = $this->newOperator( $op1, $this->pos, 1 );
+                       $this->pos ++;
+                       return $token;
+               }
+
+               // Word
+               if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) {
+                       $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' );
+               }
+               $word1 = strtolower( $m[0] );
+               $word2 = '';
+               $nextTokenPos = $this->pos + strlen( $word1 );
+               if ( $word1 === 'not' || $word1 === 'is' ) {
+                       // Look ahead one word
+                       $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos );
+                       if ( $nextTokenPos < $this->end
+                               && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos )
+                       ) {
+                               $word2 = strtolower( $m[0] );
+                               $nextTokenPos += strlen( $word2 );
+                       }
+               }
+
+               // Two-word operators like "is not" take precedence over single-word operators like "is"
+               if ( $word2 !== '' ) {
+                       $bothWords = "{$word1}-{$word2}";
+                       if ( isset( self::$precedence[$bothWords] ) ) {
+                               $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos );
+                               $this->pos = $nextTokenPos;
+                               return $token;
+                       }
+               }
+
+               // Single-word operators
+               if ( isset( self::$precedence[$word1] ) ) {
+                       $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) );
+                       $this->pos += strlen( $word1 );
+                       return $token;
+               }
+
+               // The single-character operand symbols
+               if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) {
+                       $token = $this->newNumber( $word1, $this->pos );
+                       $this->pos ++;
+                       return $token;
+               }
+
+               // Samples
+               if ( $word1 === '@integer' || $word1 === '@decimal' ) {
+                       // Samples are like comments, they have no effect on rule evaluation.
+                       // They run from the first sample indicator to the end of the string.
+                       $this->pos = $this->end;
+                       return false;
+               }
+
+               $this->error( 'unrecognised word' );
+       }
+
+       /**
+        * For the binary operator $op, pop its operands off the stack and push
+        * a fragment with rpn and type members describing the result of that
+        * operation.
+        *
+        * @param CLDRPluralRuleConverter_Operator $op
+        */
+       protected function doOperation( $op ) {
+               if ( count( $this->operands ) < 2 ) {
+                       $op->error( 'missing operand' );
+               }
+               $right = array_pop( $this->operands );
+               $left = array_pop( $this->operands );
+               $result = $op->operate( $left, $right );
+               $this->operands[] = $result;
+       }
+
+       /**
+        * Create a numerical expression object
+        *
+        * @param string $text
+        * @param int $pos
+        * @return CLDRPluralRuleConverter_Expression The numerical expression
+        */
+       protected function newNumber( $text, $pos ) {
+               return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) );
+       }
+
+       /**
+        * Create a binary operator
+        *
+        * @param string $type
+        * @param int $pos
+        * @param int $length
+        * @return CLDRPluralRuleConverter_Operator The operator
+        */
+       protected function newOperator( $type, $pos, $length ) {
+               return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length );
+       }
+
+       /**
+        * Throw an error
+        */
+       protected function error( $message ) {
+               throw new CLDRPluralRuleError( $message );
+       }
+}
diff --git a/languages/utils/CLDRPluralRuleConverter_Expression.php b/languages/utils/CLDRPluralRuleConverter_Expression.php
new file mode 100644 (file)
index 0000000..8352e72
--- /dev/null
@@ -0,0 +1,41 @@
+<?php
+
+/**
+ * @author Niklas Laxström, Tim Starling
+ *
+ * @copyright Copyright © 2010-2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
+ *
+ * @file
+ * @since 1.20
+ */
+
+/**
+ * Helper for CLDRPluralRuleConverter.
+ * An expression object, representing a region of the input string (for error
+ * messages), the RPN notation used to evaluate it, and the result type for
+ * validation.
+ */
+class CLDRPluralRuleConverter_Expression extends CLDRPluralRuleConverter_Fragment {
+       /** @var string */
+       public $type;
+
+       /** @var string */
+       public $rpn;
+
+       function __construct( $parser, $type, $rpn, $pos, $length ) {
+               parent::__construct( $parser, $pos, $length );
+               $this->type = $type;
+               $this->rpn = $rpn;
+       }
+
+       public function isType( $type ) {
+               if ( $type === 'range' && ( $this->type === 'range' || $this->type === 'number' ) ) {
+                       return true;
+               }
+               if ( $type === $this->type ) {
+                       return true;
+               }
+               return false;
+       }
+}
diff --git a/languages/utils/CLDRPluralRuleConverter_Fragment.php b/languages/utils/CLDRPluralRuleConverter_Fragment.php
new file mode 100644 (file)
index 0000000..88795a0
--- /dev/null
@@ -0,0 +1,35 @@
+<?php
+
+/**
+ * @author Niklas Laxström, Tim Starling
+ *
+ * @copyright Copyright © 2010-2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
+ *
+ * @file
+ * @since 1.20
+ */
+
+/**
+ * Helper for CLDRPluralRuleConverter.
+ * The base class for operators and expressions, describing a region of the input string.
+ */
+class CLDRPluralRuleConverter_Fragment {
+       public $parser, $pos, $length, $end;
+
+       function __construct( $parser, $pos, $length ) {
+               $this->parser = $parser;
+               $this->pos = $pos;
+               $this->length = $length;
+               $this->end = $pos + $length;
+       }
+
+       public function error( $message ) {
+               $text = $this->getText();
+               throw new CLDRPluralRuleError( "$message at position " . ( $this->pos + 1 ) . ": \"$text\"" );
+       }
+
+       public function getText() {
+               return substr( $this->parser->rule, $this->pos, $this->length );
+       }
+}
diff --git a/languages/utils/CLDRPluralRuleConverter_Operator.php b/languages/utils/CLDRPluralRuleConverter_Operator.php
new file mode 100644 (file)
index 0000000..c42953c
--- /dev/null
@@ -0,0 +1,114 @@
+<?php
+
+/**
+ * @author Niklas Laxström, Tim Starling
+ *
+ * @copyright Copyright © 2010-2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
+ *
+ * @file
+ * @since 1.20
+ */
+
+/**
+ * Helper for CLDRPluralRuleConverter.
+ * An operator object, representing a region of the input string (for error
+ * messages), and the binary operator at that location.
+ */
+class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment {
+       /** @var string The name */
+       public $name;
+
+       /**
+        * Each op type has three characters: left operand type, right operand type and result type
+        *
+        *   b = boolean
+        *   n = number
+        *   r = range
+        *
+        * A number is a kind of range.
+        *
+        * @var array
+        */
+       static $opTypes = array(
+               'or' => 'bbb',
+               'and' => 'bbb',
+               'is' => 'nnb',
+               'is-not' => 'nnb',
+               'in' => 'nrb',
+               'not-in' => 'nrb',
+               'within' => 'nrb',
+               'not-within' => 'nrb',
+               'mod' => 'nnn',
+               ',' => 'rrr',
+               '..' => 'nnr',
+       );
+
+       /**
+        * Map converting from the abbrevation to the full form.
+        *
+        * @var array
+        */
+       static $typeSpecMap = array(
+               'b' => 'boolean',
+               'n' => 'number',
+               'r' => 'range',
+       );
+
+       /**
+        * Map for converting the new operators introduced in Rev 33 to the old forms
+        */
+       static $aliasMap = array(
+               '%' => 'mod',
+               '!=' => 'not-in',
+               '=' => 'in'
+       );
+
+       /**
+        * Initialize a new instance of a CLDRPluralRuleConverter_Operator object
+        *
+        * @param CLDRPluralRuleConverter $parser The parser
+        * @param string $name The operator name
+        * @param int $pos The length
+        * @param int $length
+        */
+       function __construct( $parser, $name, $pos, $length ) {
+               parent::__construct( $parser, $pos, $length );
+               if ( isset( self::$aliasMap[$name] ) ) {
+                       $name = self::$aliasMap[$name];
+               }
+               $this->name = $name;
+       }
+
+       /**
+        * Compute the operation
+        *
+        * @param CLDRPluralRuleConverter_Expression $left The left part of the expression
+        * @param CLDRPluralRuleConverter_Expression $right The right part of the expression
+        * @return CLDRPluralRuleConverter_Expression The result of the operation
+        */
+       public function operate( $left, $right ) {
+               $typeSpec = self::$opTypes[$this->name];
+
+               $leftType = self::$typeSpecMap[$typeSpec[0]];
+               $rightType = self::$typeSpecMap[$typeSpec[1]];
+               $resultType = self::$typeSpecMap[$typeSpec[2]];
+
+               $start = min( $this->pos, $left->pos, $right->pos );
+               $end = max( $this->end, $left->end, $right->end );
+               $length = $end - $start;
+
+               $newExpr = new CLDRPluralRuleConverter_Expression( $this->parser, $resultType,
+                       "{$left->rpn} {$right->rpn} {$this->name}",
+                       $start, $length );
+
+               if ( !$left->isType( $leftType ) ) {
+                       $newExpr->error( "invalid type for left operand: expected $leftType, got {$left->type}" );
+               }
+
+               if ( !$right->isType( $rightType ) ) {
+                       $newExpr->error( "invalid type for right operand: expected $rightType, got {$right->type}" );
+               }
+               return $newExpr;
+       }
+}
diff --git a/languages/utils/CLDRPluralRuleError.php b/languages/utils/CLDRPluralRuleError.php
new file mode 100644 (file)
index 0000000..2ca3410
--- /dev/null
@@ -0,0 +1,21 @@
+<?php
+
+/**
+ * @author Niklas Laxström, Tim Starling
+ *
+ * @copyright Copyright © 2010-2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
+ *
+ * @file
+ * @since 1.20
+ */
+
+/**
+ * The exception class for all the classes in this file. This will be thrown
+ * back to the caller if there is any validation error.
+ */
+class CLDRPluralRuleError extends MWException {
+       function __construct( $message ) {
+               parent::__construct( 'CLDR plural rule error: ' . $message );
+       }
+}
\ No newline at end of file
index e420cb2..61ab947 100644 (file)
@@ -30,6 +30,7 @@
  * @file
  * @since 1.20
  */
+
 class CLDRPluralRuleEvaluator {
        /**
         * Evaluate a number against a set of plural rules. If a rule passes,
@@ -179,570 +180,3 @@ class CLDRPluralRuleEvaluator {
                }
        }
 }
-
-/**
- * Evaluator helper class representing a range list.
- */
-class CLDRPluralRuleEvaluator_Range {
-       /**
-        * The parts
-        *
-        * @var array
-        */
-       public $parts = array();
-
-       /**
-        * Initialize a new instance of CLDRPluralRuleEvaluator_Range
-        *
-        * @param int $start The start of the range
-        * @param int|bool $end The end of the range, or false if the range is not bounded.
-        */
-       function __construct( $start, $end = false ) {
-               if ( $end === false ) {
-                       $this->parts[] = $start;
-               } else {
-                       $this->parts[] = array( $start, $end );
-               }
-       }
-
-       /**
-        * Determine if the given number is inside the range.
-        *
-        * @param int $number The number to check
-        * @param bool $integerConstraint If true, also asserts the number is an integer; otherwise, number simply has to be inside the range.
-        * @return bool True if the number is inside the range; otherwise, false.
-        */
-       function isNumberIn( $number, $integerConstraint = true ) {
-               foreach ( $this->parts as $part ) {
-                       if ( is_array( $part ) ) {
-                               if ( ( !$integerConstraint || floor( $number ) === (float)$number )
-                                       && $number >= $part[0] && $number <= $part[1]
-                               ) {
-                                       return true;
-                               }
-                       } else {
-                               if ( $number == $part ) {
-                                       return true;
-                               }
-                       }
-               }
-               return false;
-       }
-
-       /**
-        * Readable alias for isNumberIn( $number, false ), and the implementation
-        * of the "within" operator.
-        *
-        * @param int $number The number to check
-        * @return bool True if the number is inside the range; otherwise, false.
-        */
-       function isNumberWithin( $number ) {
-               return $this->isNumberIn( $number, false );
-       }
-
-       /**
-        * Add another part to this range.
-        *
-        * @param CLDRPluralRuleEvaluator_Range|int $other The part to add, either
-        *   a range object itself or a single number.
-        */
-       function add( $other ) {
-               if ( $other instanceof self ) {
-                       $this->parts = array_merge( $this->parts, $other->parts );
-               } else {
-                       $this->parts[] = $other;
-               }
-       }
-
-       /**
-        * Returns the string representation of the rule evaluator range.
-        * The purpose of this method is to help debugging.
-        *
-        * @return string The string representation of the rule evaluator range
-        */
-       function __toString() {
-               $s = 'Range(';
-               foreach ( $this->parts as $i => $part ) {
-                       if ( $i ) {
-                               $s .= ', ';
-                       }
-                       if ( is_array( $part ) ) {
-                               $s .= $part[0] . '..' . $part[1];
-                       } else {
-                               $s .= $part;
-                       }
-               }
-               $s .= ')';
-               return $s;
-       }
-
-}
-
-/**
- * Helper class for converting rules to reverse polish notation (RPN).
- */
-class CLDRPluralRuleConverter {
-       /**
-        * The input string
-        *
-        * @var string
-        */
-       public $rule;
-
-       /**
-        * The current position
-        *
-        * @var int
-        */
-       public $pos;
-
-       /**
-        * The past-the-end position
-        *
-        * @var int
-        */
-       public $end;
-
-       /**
-        * The operator stack
-        *
-        * @var array
-        */
-       public $operators = array();
-
-       /**
-        * The operand stack
-        *
-        * @var array
-        */
-       public $operands = array();
-
-       /**
-        * Precedence levels. Note that there's no need to worry about associativity
-        * for the level 4 operators, since they return boolean and don't accept
-        * boolean inputs.
-        */
-       static $precedence = array(
-               'or' => 2,
-               'and' => 3,
-               'is' => 4,
-               'is-not' => 4,
-               'in' => 4,
-               'not-in' => 4,
-               'within' => 4,
-               'not-within' => 4,
-               'mod' => 5,
-               ',' => 6,
-               '..' => 7,
-       );
-
-       /**
-        * A character list defining whitespace, for use in strspn() etc.
-        */
-       const WHITESPACE_CLASS = " \t\r\n";
-
-       /**
-        * Same for digits. Note that the grammar given in UTS #35 doesn't allow
-        * negative numbers or decimal separators.
-        */
-       const NUMBER_CLASS = '0123456789';
-
-       /**
-        * A character list of symbolic operands.
-        */
-       const OPERAND_SYMBOLS = 'nivwft';
-
-       /**
-        * An anchored regular expression which matches a word at the current offset.
-        */
-       const WORD_REGEX = '/[a-zA-Z@]+/A';
-
-       /**
-        * Convert a rule to RPN. This is the only public entry point.
-        *
-        * @param string $rule The rule to convert
-        * @return string The RPN representation of the rule
-        */
-       public static function convert( $rule ) {
-               $parser = new self( $rule );
-               return $parser->doConvert();
-       }
-
-       /**
-        * Private constructor.
-        */
-       protected function __construct( $rule ) {
-               $this->rule = $rule;
-               $this->pos = 0;
-               $this->end = strlen( $rule );
-       }
-
-       /**
-        * Do the operation.
-        *
-        * @return string The RPN representation of the rule (e.g. "5 3 mod n is")
-        */
-       protected function doConvert() {
-               $expectOperator = true;
-
-               // Iterate through all tokens, saving the operators and operands to a
-               // stack per Dijkstra's shunting yard algorithm.
-               /** @var CLDRPluralRuleConverter_Operator $token */
-               while ( false !== ( $token = $this->nextToken() ) ) {
-                       // In this grammar, there are only binary operators, so every valid
-                       // rule string will alternate between operator and operand tokens.
-                       $expectOperator = !$expectOperator;
-
-                       if ( $token instanceof CLDRPluralRuleConverter_Expression ) {
-                               // Operand
-                               if ( $expectOperator ) {
-                                       $token->error( 'unexpected operand' );
-                               }
-                               $this->operands[] = $token;
-                               continue;
-                       } else {
-                               // Operator
-                               if ( !$expectOperator ) {
-                                       $token->error( 'unexpected operator' );
-                               }
-                               // Resolve higher precedence levels
-                               $lastOp = end( $this->operators );
-                               while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) {
-                                       $this->doOperation( $lastOp, $this->operands );
-                                       array_pop( $this->operators );
-                                       $lastOp = end( $this->operators );
-                               }
-                               $this->operators[] = $token;
-                       }
-               }
-
-               // Finish off the stack
-               while ( $op = array_pop( $this->operators ) ) {
-                       $this->doOperation( $op, $this->operands );
-               }
-
-               // Make sure the result is sane. The first case is possible for an empty
-               // string input, the second should be unreachable.
-               if ( !count( $this->operands ) ) {
-                       $this->error( 'condition expected' );
-               } elseif ( count( $this->operands ) > 1 ) {
-                       $this->error( 'missing operator or too many operands' );
-               }
-
-               $value = $this->operands[0];
-               if ( $value->type !== 'boolean' ) {
-                       $this->error( 'the result must have a boolean type' );
-               }
-
-               return $this->operands[0]->rpn;
-       }
-
-       /**
-        * Fetch the next token from the input string.
-        *
-        * @return CLDRPluralRuleConverter_Fragment The next token
-        */
-       protected function nextToken() {
-               if ( $this->pos >= $this->end ) {
-                       return false;
-               }
-
-               // Whitespace
-               $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos );
-               $this->pos += $length;
-
-               if ( $this->pos >= $this->end ) {
-                       return false;
-               }
-
-               // Number
-               $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos );
-               if ( $length !== 0 ) {
-                       $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos );
-                       $this->pos += $length;
-                       return $token;
-               }
-
-               // Two-character operators
-               $op2 = substr( $this->rule, $this->pos, 2 );
-               if ( $op2 === '..' || $op2 === '!=' ) {
-                       $token = $this->newOperator( $op2, $this->pos, 2 );
-                       $this->pos += 2;
-                       return $token;
-               }
-
-               // Single-character operators
-               $op1 = $this->rule[$this->pos];
-               if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) {
-                       $token = $this->newOperator( $op1, $this->pos, 1 );
-                       $this->pos ++;
-                       return $token;
-               }
-
-               // Word
-               if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) {
-                       $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' );
-               }
-               $word1 = strtolower( $m[0] );
-               $word2 = '';
-               $nextTokenPos = $this->pos + strlen( $word1 );
-               if ( $word1 === 'not' || $word1 === 'is' ) {
-                       // Look ahead one word
-                       $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos );
-                       if ( $nextTokenPos < $this->end
-                               && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos )
-                       ) {
-                               $word2 = strtolower( $m[0] );
-                               $nextTokenPos += strlen( $word2 );
-                       }
-               }
-
-               // Two-word operators like "is not" take precedence over single-word operators like "is"
-               if ( $word2 !== '' ) {
-                       $bothWords = "{$word1}-{$word2}";
-                       if ( isset( self::$precedence[$bothWords] ) ) {
-                               $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos );
-                               $this->pos = $nextTokenPos;
-                               return $token;
-                       }
-               }
-
-               // Single-word operators
-               if ( isset( self::$precedence[$word1] ) ) {
-                       $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) );
-                       $this->pos += strlen( $word1 );
-                       return $token;
-               }
-
-               // The single-character operand symbols
-               if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) {
-                       $token = $this->newNumber( $word1, $this->pos );
-                       $this->pos ++;
-                       return $token;
-               }
-
-               // Samples
-               if ( $word1 === '@integer' || $word1 === '@decimal' ) {
-                       // Samples are like comments, they have no effect on rule evaluation.
-                       // They run from the first sample indicator to the end of the string.
-                       $this->pos = $this->end;
-                       return false;
-               }
-
-               $this->error( 'unrecognised word' );
-       }
-
-       /**
-        * For the binary operator $op, pop its operands off the stack and push
-        * a fragment with rpn and type members describing the result of that
-        * operation.
-        *
-        * @param CLDRPluralRuleConverter_Operator $op
-        */
-       protected function doOperation( $op ) {
-               if ( count( $this->operands ) < 2 ) {
-                       $op->error( 'missing operand' );
-               }
-               $right = array_pop( $this->operands );
-               $left = array_pop( $this->operands );
-               $result = $op->operate( $left, $right );
-               $this->operands[] = $result;
-       }
-
-       /**
-        * Create a numerical expression object
-        *
-        * @param string $text
-        * @param int $pos
-        * @return CLDRPluralRuleConverter_Expression The numerical expression
-        */
-       protected function newNumber( $text, $pos ) {
-               return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) );
-       }
-
-       /**
-        * Create a binary operator
-        *
-        * @param string $type
-        * @param int $pos
-        * @param int $length
-        * @return CLDRPluralRuleConverter_Operator The operator
-        */
-       protected function newOperator( $type, $pos, $length ) {
-               return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length );
-       }
-
-       /**
-        * Throw an error
-        */
-       protected function error( $message ) {
-               throw new CLDRPluralRuleError( $message );
-       }
-}
-
-/**
- * Helper for CLDRPluralRuleConverter.
- * The base class for operators and expressions, describing a region of the input string.
- */
-class CLDRPluralRuleConverter_Fragment {
-       public $parser, $pos, $length, $end;
-
-       function __construct( $parser, $pos, $length ) {
-               $this->parser = $parser;
-               $this->pos = $pos;
-               $this->length = $length;
-               $this->end = $pos + $length;
-       }
-
-       public function error( $message ) {
-               $text = $this->getText();
-               throw new CLDRPluralRuleError( "$message at position " . ( $this->pos + 1 ) . ": \"$text\"" );
-       }
-
-       public function getText() {
-               return substr( $this->parser->rule, $this->pos, $this->length );
-       }
-}
-
-/**
- * Helper for CLDRPluralRuleConverter.
- * An expression object, representing a region of the input string (for error
- * messages), the RPN notation used to evaluate it, and the result type for
- * validation.
- */
-class CLDRPluralRuleConverter_Expression extends CLDRPluralRuleConverter_Fragment {
-       /** @var string */
-       public $type;
-
-       /** @var string */
-       public $rpn;
-
-       function __construct( $parser, $type, $rpn, $pos, $length ) {
-               parent::__construct( $parser, $pos, $length );
-               $this->type = $type;
-               $this->rpn = $rpn;
-       }
-
-       public function isType( $type ) {
-               if ( $type === 'range' && ( $this->type === 'range' || $this->type === 'number' ) ) {
-                       return true;
-               }
-               if ( $type === $this->type ) {
-                       return true;
-               }
-               return false;
-       }
-}
-
-/**
- * Helper for CLDRPluralRuleConverter.
- * An operator object, representing a region of the input string (for error
- * messages), and the binary operator at that location.
- */
-class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment {
-       /** @var string The name */
-       public $name;
-
-       /**
-        * Each op type has three characters: left operand type, right operand type and result type
-        *
-        *   b = boolean
-        *   n = number
-        *   r = range
-        *
-        * A number is a kind of range.
-        *
-        * @var array
-        */
-       static $opTypes = array(
-               'or' => 'bbb',
-               'and' => 'bbb',
-               'is' => 'nnb',
-               'is-not' => 'nnb',
-               'in' => 'nrb',
-               'not-in' => 'nrb',
-               'within' => 'nrb',
-               'not-within' => 'nrb',
-               'mod' => 'nnn',
-               ',' => 'rrr',
-               '..' => 'nnr',
-       );
-
-       /**
-        * Map converting from the abbrevation to the full form.
-        *
-        * @var array
-        */
-       static $typeSpecMap = array(
-               'b' => 'boolean',
-               'n' => 'number',
-               'r' => 'range',
-       );
-
-       /**
-        * Map for converting the new operators introduced in Rev 33 to the old forms
-        */
-       static $aliasMap = array(
-               '%' => 'mod',
-               '!=' => 'not-in',
-               '=' => 'in'
-       );
-
-       /**
-        * Initialize a new instance of a CLDRPluralRuleConverter_Operator object
-        *
-        * @param CLDRPluralRuleConverter $parser The parser
-        * @param string $name The operator name
-        * @param int $pos The length
-        * @param int $length
-        */
-       function __construct( $parser, $name, $pos, $length ) {
-               parent::__construct( $parser, $pos, $length );
-               if ( isset( self::$aliasMap[$name] ) ) {
-                       $name = self::$aliasMap[$name];
-               }
-               $this->name = $name;
-       }
-
-       /**
-        * Compute the operation
-        *
-        * @param CLDRPluralRuleConverter_Expression $left The left part of the expression
-        * @param CLDRPluralRuleConverter_Expression $right The right part of the expression
-        * @return CLDRPluralRuleConverter_Expression The result of the operation
-        */
-       public function operate( $left, $right ) {
-               $typeSpec = self::$opTypes[$this->name];
-
-               $leftType = self::$typeSpecMap[$typeSpec[0]];
-               $rightType = self::$typeSpecMap[$typeSpec[1]];
-               $resultType = self::$typeSpecMap[$typeSpec[2]];
-
-               $start = min( $this->pos, $left->pos, $right->pos );
-               $end = max( $this->end, $left->end, $right->end );
-               $length = $end - $start;
-
-               $newExpr = new CLDRPluralRuleConverter_Expression( $this->parser, $resultType,
-                       "{$left->rpn} {$right->rpn} {$this->name}",
-                       $start, $length );
-
-               if ( !$left->isType( $leftType ) ) {
-                       $newExpr->error( "invalid type for left operand: expected $leftType, got {$left->type}" );
-               }
-
-               if ( !$right->isType( $rightType ) ) {
-                       $newExpr->error( "invalid type for right operand: expected $rightType, got {$right->type}" );
-               }
-               return $newExpr;
-       }
-}
-
-/**
- * The exception class for all the classes in this file. This will be thrown
- * back to the caller if there is any validation error.
- */
-class CLDRPluralRuleError extends MWException {
-       function __construct( $message ) {
-               parent::__construct( 'CLDR plural rule error: ' . $message );
-       }
-}
diff --git a/languages/utils/CLDRPluralRuleEvaluator_Range.php b/languages/utils/CLDRPluralRuleEvaluator_Range.php
new file mode 100644 (file)
index 0000000..9732b8d
--- /dev/null
@@ -0,0 +1,108 @@
+<?php
+/**
+ * @author Niklas Laxström, Tim Starling
+ *
+ * @copyright Copyright © 2010-2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
+ *
+ * @file
+ * @since 1.20
+ */
+
+/**
+ * Evaluator helper class representing a range list.
+ */
+class CLDRPluralRuleEvaluator_Range {
+       /**
+        * The parts
+        *
+        * @var array
+        */
+       public $parts = array();
+
+       /**
+        * Initialize a new instance of CLDRPluralRuleEvaluator_Range
+        *
+        * @param int $start The start of the range
+        * @param int|bool $end The end of the range, or false if the range is not bounded.
+        */
+       function __construct( $start, $end = false ) {
+               if ( $end === false ) {
+                       $this->parts[] = $start;
+               } else {
+                       $this->parts[] = array( $start, $end );
+               }
+       }
+
+       /**
+        * Determine if the given number is inside the range.
+        *
+        * @param int $number The number to check
+        * @param bool $integerConstraint If true, also asserts the number is an integer; otherwise, number simply has to be inside the range.
+        * @return bool True if the number is inside the range; otherwise, false.
+        */
+       function isNumberIn( $number, $integerConstraint = true ) {
+               foreach ( $this->parts as $part ) {
+                       if ( is_array( $part ) ) {
+                               if ( ( !$integerConstraint || floor( $number ) === (float)$number )
+                                       && $number >= $part[0] && $number <= $part[1]
+                               ) {
+                                       return true;
+                               }
+                       } else {
+                               if ( $number == $part ) {
+                                       return true;
+                               }
+                       }
+               }
+               return false;
+       }
+
+       /**
+        * Readable alias for isNumberIn( $number, false ), and the implementation
+        * of the "within" operator.
+        *
+        * @param int $number The number to check
+        * @return bool True if the number is inside the range; otherwise, false.
+        */
+       function isNumberWithin( $number ) {
+               return $this->isNumberIn( $number, false );
+       }
+
+       /**
+        * Add another part to this range.
+        *
+        * @param CLDRPluralRuleEvaluator_Range|int $other The part to add, either
+        *   a range object itself or a single number.
+        */
+       function add( $other ) {
+               if ( $other instanceof self ) {
+                       $this->parts = array_merge( $this->parts, $other->parts );
+               } else {
+                       $this->parts[] = $other;
+               }
+       }
+
+       /**
+        * Returns the string representation of the rule evaluator range.
+        * The purpose of this method is to help debugging.
+        *
+        * @return string The string representation of the rule evaluator range
+        */
+       function __toString() {
+               $s = 'Range(';
+               foreach ( $this->parts as $i => $part ) {
+                       if ( $i ) {
+                               $s .= ', ';
+                       }
+                       if ( is_array( $part ) ) {
+                               $s .= $part[0] . '..' . $part[1];
+                       } else {
+                               $s .= $part;
+                       }
+               }
+               $s .= ')';
+               return $s;
+       }
+
+}