From: addshore Date: Tue, 8 Apr 2014 22:50:31 +0000 (+0100) Subject: Split CLDRPluralRuleEvaluator file X-Git-Tag: 1.31.0-rc.0~16307^2 X-Git-Url: https://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/banques/%7B%7B%20url_for%28%27admin_users%27%29%20%7D%7D?a=commitdiff_plain;h=fcd78a0bab281365368fdccf0e6c8f2e2a3c7ab7;p=lhc%2Fweb%2Fwiklou.git Split CLDRPluralRuleEvaluator file Change-Id: Iec9a939e3f28a600c841b89a2849f65d41763a62 --- diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 52b292fadf..4a4cd59ebb 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -1125,13 +1125,13 @@ $wgAutoloadLocalClasses = array( 'FakeConverter' => 'languages/Language.php', 'Language' => 'languages/Language.php', 'LanguageConverter' => 'languages/LanguageConverter.php', - 'CLDRPluralRuleConverter' => 'languages/utils/CLDRPluralRuleEvaluator.php', - 'CLDRPluralRuleConverter_Expression' => 'languages/utils/CLDRPluralRuleEvaluator.php', - 'CLDRPluralRuleConverter_Fragment' => 'languages/utils/CLDRPluralRuleEvaluator.php', - 'CLDRPluralRuleConverter_Operator' => 'languages/utils/CLDRPluralRuleEvaluator.php', + 'CLDRPluralRuleConverter' => 'languages/utils/CLDRPluralRuleConverter.php', + 'CLDRPluralRuleConverter_Expression' => 'languages/utils/CLDRPluralRuleConverter_Expression.php', + 'CLDRPluralRuleConverter_Fragment' => 'languages/utils/CLDRPluralRuleConverter_Fragment.php', + 'CLDRPluralRuleConverter_Operator' => 'languages/utils/CLDRPluralRuleConverter_Operator.php', 'CLDRPluralRuleEvaluator' => 'languages/utils/CLDRPluralRuleEvaluator.php', - 'CLDRPluralRuleEvaluator_Range' => 'languages/utils/CLDRPluralRuleEvaluator.php', - 'CLDRPluralRuleError' => 'languages/utils/CLDRPluralRuleEvaluator.php', + 'CLDRPluralRuleEvaluator_Range' => 'languages/utils/CLDRPluralRuleEvaluator_Range.php', + 'CLDRPluralRuleError' => 'languages/utils/CLDRPluralRuleError.php', # maintenance 'BackupDumper' => 'maintenance/backup.inc', diff --git a/languages/utils/CLDRPluralRuleConverter.php b/languages/utils/CLDRPluralRuleConverter.php new file mode 100644 index 0000000000..65d57e0d38 --- /dev/null +++ b/languages/utils/CLDRPluralRuleConverter.php @@ -0,0 +1,313 @@ + 2, + 'and' => 3, + 'is' => 4, + 'is-not' => 4, + 'in' => 4, + 'not-in' => 4, + 'within' => 4, + 'not-within' => 4, + 'mod' => 5, + ',' => 6, + '..' => 7, + ); + + /** + * A character list defining whitespace, for use in strspn() etc. + */ + const WHITESPACE_CLASS = " \t\r\n"; + + /** + * Same for digits. Note that the grammar given in UTS #35 doesn't allow + * negative numbers or decimal separators. + */ + const NUMBER_CLASS = '0123456789'; + + /** + * A character list of symbolic operands. + */ + const OPERAND_SYMBOLS = 'nivwft'; + + /** + * An anchored regular expression which matches a word at the current offset. + */ + const WORD_REGEX = '/[a-zA-Z@]+/A'; + + /** + * Convert a rule to RPN. This is the only public entry point. + * + * @param string $rule The rule to convert + * @return string The RPN representation of the rule + */ + public static function convert( $rule ) { + $parser = new self( $rule ); + return $parser->doConvert(); + } + + /** + * Private constructor. + */ + protected function __construct( $rule ) { + $this->rule = $rule; + $this->pos = 0; + $this->end = strlen( $rule ); + } + + /** + * Do the operation. + * + * @return string The RPN representation of the rule (e.g. "5 3 mod n is") + */ + protected function doConvert() { + $expectOperator = true; + + // Iterate through all tokens, saving the operators and operands to a + // stack per Dijkstra's shunting yard algorithm. + /** @var CLDRPluralRuleConverter_Operator $token */ + while ( false !== ( $token = $this->nextToken() ) ) { + // In this grammar, there are only binary operators, so every valid + // rule string will alternate between operator and operand tokens. + $expectOperator = !$expectOperator; + + if ( $token instanceof CLDRPluralRuleConverter_Expression ) { + // Operand + if ( $expectOperator ) { + $token->error( 'unexpected operand' ); + } + $this->operands[] = $token; + continue; + } else { + // Operator + if ( !$expectOperator ) { + $token->error( 'unexpected operator' ); + } + // Resolve higher precedence levels + $lastOp = end( $this->operators ); + while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) { + $this->doOperation( $lastOp, $this->operands ); + array_pop( $this->operators ); + $lastOp = end( $this->operators ); + } + $this->operators[] = $token; + } + } + + // Finish off the stack + while ( $op = array_pop( $this->operators ) ) { + $this->doOperation( $op, $this->operands ); + } + + // Make sure the result is sane. The first case is possible for an empty + // string input, the second should be unreachable. + if ( !count( $this->operands ) ) { + $this->error( 'condition expected' ); + } elseif ( count( $this->operands ) > 1 ) { + $this->error( 'missing operator or too many operands' ); + } + + $value = $this->operands[0]; + if ( $value->type !== 'boolean' ) { + $this->error( 'the result must have a boolean type' ); + } + + return $this->operands[0]->rpn; + } + + /** + * Fetch the next token from the input string. + * + * @return CLDRPluralRuleConverter_Fragment The next token + */ + protected function nextToken() { + if ( $this->pos >= $this->end ) { + return false; + } + + // Whitespace + $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos ); + $this->pos += $length; + + if ( $this->pos >= $this->end ) { + return false; + } + + // Number + $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos ); + if ( $length !== 0 ) { + $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos ); + $this->pos += $length; + return $token; + } + + // Two-character operators + $op2 = substr( $this->rule, $this->pos, 2 ); + if ( $op2 === '..' || $op2 === '!=' ) { + $token = $this->newOperator( $op2, $this->pos, 2 ); + $this->pos += 2; + return $token; + } + + // Single-character operators + $op1 = $this->rule[$this->pos]; + if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) { + $token = $this->newOperator( $op1, $this->pos, 1 ); + $this->pos ++; + return $token; + } + + // Word + if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) { + $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' ); + } + $word1 = strtolower( $m[0] ); + $word2 = ''; + $nextTokenPos = $this->pos + strlen( $word1 ); + if ( $word1 === 'not' || $word1 === 'is' ) { + // Look ahead one word + $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos ); + if ( $nextTokenPos < $this->end + && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos ) + ) { + $word2 = strtolower( $m[0] ); + $nextTokenPos += strlen( $word2 ); + } + } + + // Two-word operators like "is not" take precedence over single-word operators like "is" + if ( $word2 !== '' ) { + $bothWords = "{$word1}-{$word2}"; + if ( isset( self::$precedence[$bothWords] ) ) { + $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos ); + $this->pos = $nextTokenPos; + return $token; + } + } + + // Single-word operators + if ( isset( self::$precedence[$word1] ) ) { + $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) ); + $this->pos += strlen( $word1 ); + return $token; + } + + // The single-character operand symbols + if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) { + $token = $this->newNumber( $word1, $this->pos ); + $this->pos ++; + return $token; + } + + // Samples + if ( $word1 === '@integer' || $word1 === '@decimal' ) { + // Samples are like comments, they have no effect on rule evaluation. + // They run from the first sample indicator to the end of the string. + $this->pos = $this->end; + return false; + } + + $this->error( 'unrecognised word' ); + } + + /** + * For the binary operator $op, pop its operands off the stack and push + * a fragment with rpn and type members describing the result of that + * operation. + * + * @param CLDRPluralRuleConverter_Operator $op + */ + protected function doOperation( $op ) { + if ( count( $this->operands ) < 2 ) { + $op->error( 'missing operand' ); + } + $right = array_pop( $this->operands ); + $left = array_pop( $this->operands ); + $result = $op->operate( $left, $right ); + $this->operands[] = $result; + } + + /** + * Create a numerical expression object + * + * @param string $text + * @param int $pos + * @return CLDRPluralRuleConverter_Expression The numerical expression + */ + protected function newNumber( $text, $pos ) { + return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) ); + } + + /** + * Create a binary operator + * + * @param string $type + * @param int $pos + * @param int $length + * @return CLDRPluralRuleConverter_Operator The operator + */ + protected function newOperator( $type, $pos, $length ) { + return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length ); + } + + /** + * Throw an error + */ + protected function error( $message ) { + throw new CLDRPluralRuleError( $message ); + } +} diff --git a/languages/utils/CLDRPluralRuleConverter_Expression.php b/languages/utils/CLDRPluralRuleConverter_Expression.php new file mode 100644 index 0000000000..8352e725dc --- /dev/null +++ b/languages/utils/CLDRPluralRuleConverter_Expression.php @@ -0,0 +1,41 @@ +type = $type; + $this->rpn = $rpn; + } + + public function isType( $type ) { + if ( $type === 'range' && ( $this->type === 'range' || $this->type === 'number' ) ) { + return true; + } + if ( $type === $this->type ) { + return true; + } + return false; + } +} diff --git a/languages/utils/CLDRPluralRuleConverter_Fragment.php b/languages/utils/CLDRPluralRuleConverter_Fragment.php new file mode 100644 index 0000000000..88795a0bee --- /dev/null +++ b/languages/utils/CLDRPluralRuleConverter_Fragment.php @@ -0,0 +1,35 @@ +parser = $parser; + $this->pos = $pos; + $this->length = $length; + $this->end = $pos + $length; + } + + public function error( $message ) { + $text = $this->getText(); + throw new CLDRPluralRuleError( "$message at position " . ( $this->pos + 1 ) . ": \"$text\"" ); + } + + public function getText() { + return substr( $this->parser->rule, $this->pos, $this->length ); + } +} diff --git a/languages/utils/CLDRPluralRuleConverter_Operator.php b/languages/utils/CLDRPluralRuleConverter_Operator.php new file mode 100644 index 0000000000..c42953c70f --- /dev/null +++ b/languages/utils/CLDRPluralRuleConverter_Operator.php @@ -0,0 +1,114 @@ + 'bbb', + 'and' => 'bbb', + 'is' => 'nnb', + 'is-not' => 'nnb', + 'in' => 'nrb', + 'not-in' => 'nrb', + 'within' => 'nrb', + 'not-within' => 'nrb', + 'mod' => 'nnn', + ',' => 'rrr', + '..' => 'nnr', + ); + + /** + * Map converting from the abbrevation to the full form. + * + * @var array + */ + static $typeSpecMap = array( + 'b' => 'boolean', + 'n' => 'number', + 'r' => 'range', + ); + + /** + * Map for converting the new operators introduced in Rev 33 to the old forms + */ + static $aliasMap = array( + '%' => 'mod', + '!=' => 'not-in', + '=' => 'in' + ); + + /** + * Initialize a new instance of a CLDRPluralRuleConverter_Operator object + * + * @param CLDRPluralRuleConverter $parser The parser + * @param string $name The operator name + * @param int $pos The length + * @param int $length + */ + function __construct( $parser, $name, $pos, $length ) { + parent::__construct( $parser, $pos, $length ); + if ( isset( self::$aliasMap[$name] ) ) { + $name = self::$aliasMap[$name]; + } + $this->name = $name; + } + + /** + * Compute the operation + * + * @param CLDRPluralRuleConverter_Expression $left The left part of the expression + * @param CLDRPluralRuleConverter_Expression $right The right part of the expression + * @return CLDRPluralRuleConverter_Expression The result of the operation + */ + public function operate( $left, $right ) { + $typeSpec = self::$opTypes[$this->name]; + + $leftType = self::$typeSpecMap[$typeSpec[0]]; + $rightType = self::$typeSpecMap[$typeSpec[1]]; + $resultType = self::$typeSpecMap[$typeSpec[2]]; + + $start = min( $this->pos, $left->pos, $right->pos ); + $end = max( $this->end, $left->end, $right->end ); + $length = $end - $start; + + $newExpr = new CLDRPluralRuleConverter_Expression( $this->parser, $resultType, + "{$left->rpn} {$right->rpn} {$this->name}", + $start, $length ); + + if ( !$left->isType( $leftType ) ) { + $newExpr->error( "invalid type for left operand: expected $leftType, got {$left->type}" ); + } + + if ( !$right->isType( $rightType ) ) { + $newExpr->error( "invalid type for right operand: expected $rightType, got {$right->type}" ); + } + return $newExpr; + } +} diff --git a/languages/utils/CLDRPluralRuleError.php b/languages/utils/CLDRPluralRuleError.php new file mode 100644 index 0000000000..2ca3410dd2 --- /dev/null +++ b/languages/utils/CLDRPluralRuleError.php @@ -0,0 +1,21 @@ +parts[] = $start; - } else { - $this->parts[] = array( $start, $end ); - } - } - - /** - * Determine if the given number is inside the range. - * - * @param int $number The number to check - * @param bool $integerConstraint If true, also asserts the number is an integer; otherwise, number simply has to be inside the range. - * @return bool True if the number is inside the range; otherwise, false. - */ - function isNumberIn( $number, $integerConstraint = true ) { - foreach ( $this->parts as $part ) { - if ( is_array( $part ) ) { - if ( ( !$integerConstraint || floor( $number ) === (float)$number ) - && $number >= $part[0] && $number <= $part[1] - ) { - return true; - } - } else { - if ( $number == $part ) { - return true; - } - } - } - return false; - } - - /** - * Readable alias for isNumberIn( $number, false ), and the implementation - * of the "within" operator. - * - * @param int $number The number to check - * @return bool True if the number is inside the range; otherwise, false. - */ - function isNumberWithin( $number ) { - return $this->isNumberIn( $number, false ); - } - - /** - * Add another part to this range. - * - * @param CLDRPluralRuleEvaluator_Range|int $other The part to add, either - * a range object itself or a single number. - */ - function add( $other ) { - if ( $other instanceof self ) { - $this->parts = array_merge( $this->parts, $other->parts ); - } else { - $this->parts[] = $other; - } - } - - /** - * Returns the string representation of the rule evaluator range. - * The purpose of this method is to help debugging. - * - * @return string The string representation of the rule evaluator range - */ - function __toString() { - $s = 'Range('; - foreach ( $this->parts as $i => $part ) { - if ( $i ) { - $s .= ', '; - } - if ( is_array( $part ) ) { - $s .= $part[0] . '..' . $part[1]; - } else { - $s .= $part; - } - } - $s .= ')'; - return $s; - } - -} - -/** - * Helper class for converting rules to reverse polish notation (RPN). - */ -class CLDRPluralRuleConverter { - /** - * The input string - * - * @var string - */ - public $rule; - - /** - * The current position - * - * @var int - */ - public $pos; - - /** - * The past-the-end position - * - * @var int - */ - public $end; - - /** - * The operator stack - * - * @var array - */ - public $operators = array(); - - /** - * The operand stack - * - * @var array - */ - public $operands = array(); - - /** - * Precedence levels. Note that there's no need to worry about associativity - * for the level 4 operators, since they return boolean and don't accept - * boolean inputs. - */ - static $precedence = array( - 'or' => 2, - 'and' => 3, - 'is' => 4, - 'is-not' => 4, - 'in' => 4, - 'not-in' => 4, - 'within' => 4, - 'not-within' => 4, - 'mod' => 5, - ',' => 6, - '..' => 7, - ); - - /** - * A character list defining whitespace, for use in strspn() etc. - */ - const WHITESPACE_CLASS = " \t\r\n"; - - /** - * Same for digits. Note that the grammar given in UTS #35 doesn't allow - * negative numbers or decimal separators. - */ - const NUMBER_CLASS = '0123456789'; - - /** - * A character list of symbolic operands. - */ - const OPERAND_SYMBOLS = 'nivwft'; - - /** - * An anchored regular expression which matches a word at the current offset. - */ - const WORD_REGEX = '/[a-zA-Z@]+/A'; - - /** - * Convert a rule to RPN. This is the only public entry point. - * - * @param string $rule The rule to convert - * @return string The RPN representation of the rule - */ - public static function convert( $rule ) { - $parser = new self( $rule ); - return $parser->doConvert(); - } - - /** - * Private constructor. - */ - protected function __construct( $rule ) { - $this->rule = $rule; - $this->pos = 0; - $this->end = strlen( $rule ); - } - - /** - * Do the operation. - * - * @return string The RPN representation of the rule (e.g. "5 3 mod n is") - */ - protected function doConvert() { - $expectOperator = true; - - // Iterate through all tokens, saving the operators and operands to a - // stack per Dijkstra's shunting yard algorithm. - /** @var CLDRPluralRuleConverter_Operator $token */ - while ( false !== ( $token = $this->nextToken() ) ) { - // In this grammar, there are only binary operators, so every valid - // rule string will alternate between operator and operand tokens. - $expectOperator = !$expectOperator; - - if ( $token instanceof CLDRPluralRuleConverter_Expression ) { - // Operand - if ( $expectOperator ) { - $token->error( 'unexpected operand' ); - } - $this->operands[] = $token; - continue; - } else { - // Operator - if ( !$expectOperator ) { - $token->error( 'unexpected operator' ); - } - // Resolve higher precedence levels - $lastOp = end( $this->operators ); - while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) { - $this->doOperation( $lastOp, $this->operands ); - array_pop( $this->operators ); - $lastOp = end( $this->operators ); - } - $this->operators[] = $token; - } - } - - // Finish off the stack - while ( $op = array_pop( $this->operators ) ) { - $this->doOperation( $op, $this->operands ); - } - - // Make sure the result is sane. The first case is possible for an empty - // string input, the second should be unreachable. - if ( !count( $this->operands ) ) { - $this->error( 'condition expected' ); - } elseif ( count( $this->operands ) > 1 ) { - $this->error( 'missing operator or too many operands' ); - } - - $value = $this->operands[0]; - if ( $value->type !== 'boolean' ) { - $this->error( 'the result must have a boolean type' ); - } - - return $this->operands[0]->rpn; - } - - /** - * Fetch the next token from the input string. - * - * @return CLDRPluralRuleConverter_Fragment The next token - */ - protected function nextToken() { - if ( $this->pos >= $this->end ) { - return false; - } - - // Whitespace - $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos ); - $this->pos += $length; - - if ( $this->pos >= $this->end ) { - return false; - } - - // Number - $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos ); - if ( $length !== 0 ) { - $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos ); - $this->pos += $length; - return $token; - } - - // Two-character operators - $op2 = substr( $this->rule, $this->pos, 2 ); - if ( $op2 === '..' || $op2 === '!=' ) { - $token = $this->newOperator( $op2, $this->pos, 2 ); - $this->pos += 2; - return $token; - } - - // Single-character operators - $op1 = $this->rule[$this->pos]; - if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) { - $token = $this->newOperator( $op1, $this->pos, 1 ); - $this->pos ++; - return $token; - } - - // Word - if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) { - $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' ); - } - $word1 = strtolower( $m[0] ); - $word2 = ''; - $nextTokenPos = $this->pos + strlen( $word1 ); - if ( $word1 === 'not' || $word1 === 'is' ) { - // Look ahead one word - $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos ); - if ( $nextTokenPos < $this->end - && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos ) - ) { - $word2 = strtolower( $m[0] ); - $nextTokenPos += strlen( $word2 ); - } - } - - // Two-word operators like "is not" take precedence over single-word operators like "is" - if ( $word2 !== '' ) { - $bothWords = "{$word1}-{$word2}"; - if ( isset( self::$precedence[$bothWords] ) ) { - $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos ); - $this->pos = $nextTokenPos; - return $token; - } - } - - // Single-word operators - if ( isset( self::$precedence[$word1] ) ) { - $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) ); - $this->pos += strlen( $word1 ); - return $token; - } - - // The single-character operand symbols - if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) { - $token = $this->newNumber( $word1, $this->pos ); - $this->pos ++; - return $token; - } - - // Samples - if ( $word1 === '@integer' || $word1 === '@decimal' ) { - // Samples are like comments, they have no effect on rule evaluation. - // They run from the first sample indicator to the end of the string. - $this->pos = $this->end; - return false; - } - - $this->error( 'unrecognised word' ); - } - - /** - * For the binary operator $op, pop its operands off the stack and push - * a fragment with rpn and type members describing the result of that - * operation. - * - * @param CLDRPluralRuleConverter_Operator $op - */ - protected function doOperation( $op ) { - if ( count( $this->operands ) < 2 ) { - $op->error( 'missing operand' ); - } - $right = array_pop( $this->operands ); - $left = array_pop( $this->operands ); - $result = $op->operate( $left, $right ); - $this->operands[] = $result; - } - - /** - * Create a numerical expression object - * - * @param string $text - * @param int $pos - * @return CLDRPluralRuleConverter_Expression The numerical expression - */ - protected function newNumber( $text, $pos ) { - return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) ); - } - - /** - * Create a binary operator - * - * @param string $type - * @param int $pos - * @param int $length - * @return CLDRPluralRuleConverter_Operator The operator - */ - protected function newOperator( $type, $pos, $length ) { - return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length ); - } - - /** - * Throw an error - */ - protected function error( $message ) { - throw new CLDRPluralRuleError( $message ); - } -} - -/** - * Helper for CLDRPluralRuleConverter. - * The base class for operators and expressions, describing a region of the input string. - */ -class CLDRPluralRuleConverter_Fragment { - public $parser, $pos, $length, $end; - - function __construct( $parser, $pos, $length ) { - $this->parser = $parser; - $this->pos = $pos; - $this->length = $length; - $this->end = $pos + $length; - } - - public function error( $message ) { - $text = $this->getText(); - throw new CLDRPluralRuleError( "$message at position " . ( $this->pos + 1 ) . ": \"$text\"" ); - } - - public function getText() { - return substr( $this->parser->rule, $this->pos, $this->length ); - } -} - -/** - * Helper for CLDRPluralRuleConverter. - * An expression object, representing a region of the input string (for error - * messages), the RPN notation used to evaluate it, and the result type for - * validation. - */ -class CLDRPluralRuleConverter_Expression extends CLDRPluralRuleConverter_Fragment { - /** @var string */ - public $type; - - /** @var string */ - public $rpn; - - function __construct( $parser, $type, $rpn, $pos, $length ) { - parent::__construct( $parser, $pos, $length ); - $this->type = $type; - $this->rpn = $rpn; - } - - public function isType( $type ) { - if ( $type === 'range' && ( $this->type === 'range' || $this->type === 'number' ) ) { - return true; - } - if ( $type === $this->type ) { - return true; - } - return false; - } -} - -/** - * Helper for CLDRPluralRuleConverter. - * An operator object, representing a region of the input string (for error - * messages), and the binary operator at that location. - */ -class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment { - /** @var string The name */ - public $name; - - /** - * Each op type has three characters: left operand type, right operand type and result type - * - * b = boolean - * n = number - * r = range - * - * A number is a kind of range. - * - * @var array - */ - static $opTypes = array( - 'or' => 'bbb', - 'and' => 'bbb', - 'is' => 'nnb', - 'is-not' => 'nnb', - 'in' => 'nrb', - 'not-in' => 'nrb', - 'within' => 'nrb', - 'not-within' => 'nrb', - 'mod' => 'nnn', - ',' => 'rrr', - '..' => 'nnr', - ); - - /** - * Map converting from the abbrevation to the full form. - * - * @var array - */ - static $typeSpecMap = array( - 'b' => 'boolean', - 'n' => 'number', - 'r' => 'range', - ); - - /** - * Map for converting the new operators introduced in Rev 33 to the old forms - */ - static $aliasMap = array( - '%' => 'mod', - '!=' => 'not-in', - '=' => 'in' - ); - - /** - * Initialize a new instance of a CLDRPluralRuleConverter_Operator object - * - * @param CLDRPluralRuleConverter $parser The parser - * @param string $name The operator name - * @param int $pos The length - * @param int $length - */ - function __construct( $parser, $name, $pos, $length ) { - parent::__construct( $parser, $pos, $length ); - if ( isset( self::$aliasMap[$name] ) ) { - $name = self::$aliasMap[$name]; - } - $this->name = $name; - } - - /** - * Compute the operation - * - * @param CLDRPluralRuleConverter_Expression $left The left part of the expression - * @param CLDRPluralRuleConverter_Expression $right The right part of the expression - * @return CLDRPluralRuleConverter_Expression The result of the operation - */ - public function operate( $left, $right ) { - $typeSpec = self::$opTypes[$this->name]; - - $leftType = self::$typeSpecMap[$typeSpec[0]]; - $rightType = self::$typeSpecMap[$typeSpec[1]]; - $resultType = self::$typeSpecMap[$typeSpec[2]]; - - $start = min( $this->pos, $left->pos, $right->pos ); - $end = max( $this->end, $left->end, $right->end ); - $length = $end - $start; - - $newExpr = new CLDRPluralRuleConverter_Expression( $this->parser, $resultType, - "{$left->rpn} {$right->rpn} {$this->name}", - $start, $length ); - - if ( !$left->isType( $leftType ) ) { - $newExpr->error( "invalid type for left operand: expected $leftType, got {$left->type}" ); - } - - if ( !$right->isType( $rightType ) ) { - $newExpr->error( "invalid type for right operand: expected $rightType, got {$right->type}" ); - } - return $newExpr; - } -} - -/** - * The exception class for all the classes in this file. This will be thrown - * back to the caller if there is any validation error. - */ -class CLDRPluralRuleError extends MWException { - function __construct( $message ) { - parent::__construct( 'CLDR plural rule error: ' . $message ); - } -} diff --git a/languages/utils/CLDRPluralRuleEvaluator_Range.php b/languages/utils/CLDRPluralRuleEvaluator_Range.php new file mode 100644 index 0000000000..9732b8da81 --- /dev/null +++ b/languages/utils/CLDRPluralRuleEvaluator_Range.php @@ -0,0 +1,108 @@ +parts[] = $start; + } else { + $this->parts[] = array( $start, $end ); + } + } + + /** + * Determine if the given number is inside the range. + * + * @param int $number The number to check + * @param bool $integerConstraint If true, also asserts the number is an integer; otherwise, number simply has to be inside the range. + * @return bool True if the number is inside the range; otherwise, false. + */ + function isNumberIn( $number, $integerConstraint = true ) { + foreach ( $this->parts as $part ) { + if ( is_array( $part ) ) { + if ( ( !$integerConstraint || floor( $number ) === (float)$number ) + && $number >= $part[0] && $number <= $part[1] + ) { + return true; + } + } else { + if ( $number == $part ) { + return true; + } + } + } + return false; + } + + /** + * Readable alias for isNumberIn( $number, false ), and the implementation + * of the "within" operator. + * + * @param int $number The number to check + * @return bool True if the number is inside the range; otherwise, false. + */ + function isNumberWithin( $number ) { + return $this->isNumberIn( $number, false ); + } + + /** + * Add another part to this range. + * + * @param CLDRPluralRuleEvaluator_Range|int $other The part to add, either + * a range object itself or a single number. + */ + function add( $other ) { + if ( $other instanceof self ) { + $this->parts = array_merge( $this->parts, $other->parts ); + } else { + $this->parts[] = $other; + } + } + + /** + * Returns the string representation of the rule evaluator range. + * The purpose of this method is to help debugging. + * + * @return string The string representation of the rule evaluator range + */ + function __toString() { + $s = 'Range('; + foreach ( $this->parts as $i => $part ) { + if ( $i ) { + $s .= ', '; + } + if ( is_array( $part ) ) { + $s .= $part[0] . '..' . $part[1]; + } else { + $s .= $part; + } + } + $s .= ')'; + return $s; + } + +}