/**
* Parse and evaluate a plural rule.
*
- * http://unicode.org/reports/tr35/#Language_Plural_Rules
+ * UTS #35 Revision 33
+ * http://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules
*
* @author Niklas Laxstrom, Tim Starling
*
* Evaluate a compiled set of rules returned by compile(). Do not allow
* the user to edit the compiled form, or else PHP errors may result.
*
- * @param int The number to be evaluated against the rules
+ * @param string The number to be evaluated against the rules, in English, or it
+ * may be a type convertible to string.
* @param array The associative array of plural rules in pluralform => rule format.
* @return int The index of the plural form which passed the evaluation
*/
public static function evaluateCompiled( $number, array $rules ) {
+ // Calculate the values of the operand symbols
+ $number = strval( $number );
+ if ( !preg_match( '/^ -? ( ([0-9]+) (?: \. ([0-9]+) )? )$/x', $number, $m ) ) {
+ wfDebug( __METHOD__.': invalid number input, returning "other"' );
+ return count( $rules );
+ }
+ if ( !isset( $m[3] ) ) {
+ $operandSymbols = array(
+ 'n' => intval( $m[1] ),
+ 'i' => intval( $m[1] ),
+ 'v' => 0,
+ 'w' => 0,
+ 'f' => 0,
+ 't' => 0
+ );
+ } else {
+ $absValStr = $m[1];
+ $intStr = $m[2];
+ $fracStr = $m[3];
+ $operandSymbols = array(
+ 'n' => floatval( $absValStr ),
+ 'i' => intval( $intStr ),
+ 'v' => strlen( $fracStr ),
+ 'w' => strlen( rtrim( $fracStr, '0' ) ),
+ 'f' => intval( $fracStr ),
+ 't' => intval( rtrim( $fracStr, '0' ) ),
+ );
+ }
+
// The compiled form is RPN, with tokens strictly delimited by
// spaces, so this is a simple RPN evaluator.
foreach ( $rules as $i => $rule ) {
$nine = ord( '9' );
foreach ( StringUtils::explode( ' ', $rule ) as $token ) {
$ord = ord( $token );
- if ( $token === 'n' ) {
- $stack[] = $number;
+ if ( isset( $operandSymbols[$token] ) ) {
+ $stack[] = $operandSymbols[$token];
} elseif ( $ord >= $zero && $ord <= $nine ) {
$stack[] = intval( $token );
} else {
return $i;
}
}
- // None of the provided rules match. The number belongs to caregory
- // 'other' which comes last.
+ // None of the provided rules match. The number belongs to category
+ // 'other', which comes last.
return count( $rules );
}
*/
class CLDRPluralRuleConverter {
/**
- * The rule
+ * The input string
*
* @var string
*/
public $rule;
/**
- * The position
+ * The current position
*
* @var int
*/
public $pos;
/**
- * The last position possible
+ * The past-the-end position
*
* @var int
*/
public $end;
/**
- * The operators
+ * The operator stack
*
* @var array
*/
public $operators = array();
/**
- * The operands
+ * The operand stack
*
* @var array
*/
/**
* Same for digits. Note that the grammar given in UTS #35 doesn't allow
- * negative numbers or decimals.
+ * negative numbers or decimal separators.
*/
const NUMBER_CLASS = '0123456789';
+ /**
+ * A character list of symbolic operands.
+ */
+ const OPERAND_SYMBOLS = 'nivwft';
+
/**
* An anchored regular expression which matches a word at the current offset.
*/
- const WORD_REGEX = '/[a-zA-Z]+/A';
+ const WORD_REGEX = '/[a-zA-Z@]+/A';
/**
* Convert a rule to RPN. This is the only public entry point.
return $token;
}
- // Comma
- if ( $this->rule[$this->pos] === ',' ) {
- $token = $this->newOperator( ',', $this->pos, 1 );
- $this->pos ++;
+ // Two-character operators
+ $op2 = substr( $this->rule, $this->pos, 2 );
+ if ( $op2 === '..' || $op2 === '!=' ) {
+ $token = $this->newOperator( $op2, $this->pos, 2 );
+ $this->pos += 2;
return $token;
}
- // Dot dot
- if ( substr( $this->rule, $this->pos, 2 ) === '..' ) {
- $token = $this->newOperator( '..', $this->pos, 2 );
- $this->pos += 2;
+ // Single-character operators
+ $op1 = $this->rule[$this->pos];
+ if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) {
+ $token = $this->newOperator( $op1, $this->pos, 1 );
+ $this->pos ++;
return $token;
}
return $token;
}
- // The special numerical keyword "n"
- if ( $word1 === 'n' ) {
- $token = $this->newNumber( 'n', $this->pos );
+ // The single-character operand symbols
+ if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) {
+ $token = $this->newNumber( $word1, $this->pos );
$this->pos ++;
return $token;
}
+ // Samples
+ if ( $word1 === '@integer' || $word1 === '@decimal' ) {
+ // Samples are like comments, they have no effect on rule evaluation.
+ // They run from the first sample indicator to the end of the string.
+ $this->pos = $this->end;
+ return false;
+ }
+
$this->error( 'unrecognised word' );
}
'r' => 'range',
);
+ /**
+ * Map for converting the new operators introduced in Rev 33 to the old forms
+ */
+ static $aliasMap = array(
+ '%' => 'mod',
+ '!=' => 'not-in',
+ '=' => 'in'
+ );
+
/**
* Initialize a new instance of a CLDRPluralRuleConverter_Operator object
*
*/
function __construct( $parser, $name, $pos, $length ) {
parent::__construct( $parser, $pos, $length );
+ if ( isset( self::$aliasMap[$name] ) ) {
+ $name = self::$aliasMap[$name];
+ }
$this->name = $name;
}
array( 0, 'n in 3..10,13..19', 13, 'scottish rule - ranges with comma' ),
array( 0, '5 mod 3 is n', 2, 'n as result of mod - no need to pass' ),
+
+ # Revision 33 new operand examples
+ # expected, rule, number, comment
+ array( 0, 'i is 1', '1.00', 'new operand i' ),
+ array( 0, 'v is 2', '1.00', 'new operand v' ),
+ array( 0, 'w is 0', '1.00', 'new operand w' ),
+ array( 0, 'f is 0', '1.00', 'new operand f' ),
+ array( 0, 't is 0', '1.00', 'new operand t' ),
+
+ array( 0, 'i is 1', '1.30', 'new operand i' ),
+ array( 0, 'v is 2', '1.30', 'new operand v' ),
+ array( 0, 'w is 1', '1.30', 'new operand w' ),
+ array( 0, 'f is 30', '1.30', 'new operand f' ),
+ array( 0, 't is 3', '1.30', 'new operand t' ),
+
+ array( 0, 'i is 1', '1.03', 'new operand i' ),
+ array( 0, 'v is 2', '1.03', 'new operand v' ),
+ array( 0, 'w is 2', '1.03', 'new operand w' ),
+ array( 0, 'f is 3', '1.03', 'new operand f' ),
+ array( 0, 't is 3', '1.03', 'new operand t' ),
+
+ # Revision 33 new operator aliases
+ # expected, rule, number, comment
+ array( 0, 'n % 3 is 1', 7, 'new % operator' ),
+ array( 0, 'n = 1,3,5', 3, 'new = operator' ),
+ array( 1, 'n != 1,3,5', 5, 'new != operator' ),
+
+ # Revision 33 samples
+ # expected, rule, number, comment
+ array( 0, 'n in 1,3,5@integer 3~10, 103~110, 1003, … @decimal 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 103.0, 1003.0, …', 3, 'samples' ),
+
+ # Revision 33 some test cases from CLDR
+ array( 0, 'i = 1 and v = 0 or i = 0 and t = 1', '0.1', 'pt one' ),
+ array( 0, 'i = 1 and v = 0 or i = 0 and t = 1', '0.01', 'pt one' ),
+ array( 0, 'i = 1 and v = 0 or i = 0 and t = 1', '0.10', 'pt one' ),
+ array( 0, 'i = 1 and v = 0 or i = 0 and t = 1', '0.010', 'pt one' ),
+ array( 0, 'i = 1 and v = 0 or i = 0 and t = 1', '0.100', 'pt one' ),
+ array( 1, 'i = 1 and v = 0 or i = 0 and t = 1', '0.0', 'pt other' ),
+ array( 1, 'i = 1 and v = 0 or i = 0 and t = 1', '0.2', 'pt other' ),
+ array( 1, 'i = 1 and v = 0 or i = 0 and t = 1', '10.0', 'pt other' ),
+ array( 1, 'i = 1 and v = 0 or i = 0 and t = 1', '100.0', 'pt other' ),
+ array( 0, 'v = 0 and i % 10 = 2..4 and i % 100 != 12..14 or f % 10 = 2..4 and f % 100 != 12..14', '2', 'bs few' ),
+ array( 0, 'v = 0 and i % 10 = 2..4 and i % 100 != 12..14 or f % 10 = 2..4 and f % 100 != 12..14', '4', 'bs few' ),
+ array( 0, 'v = 0 and i % 10 = 2..4 and i % 100 != 12..14 or f % 10 = 2..4 and f % 100 != 12..14', '22', 'bs few' ),
+ array( 0, 'v = 0 and i % 10 = 2..4 and i % 100 != 12..14 or f % 10 = 2..4 and f % 100 != 12..14', '102', 'bs few' ),
+ array( 0, 'v = 0 and i % 10 = 2..4 and i % 100 != 12..14 or f % 10 = 2..4 and f % 100 != 12..14', '0.2', 'bs few' ),
+ array( 0, 'v = 0 and i % 10 = 2..4 and i % 100 != 12..14 or f % 10 = 2..4 and f % 100 != 12..14', '0.4', 'bs few' ),
+ array( 0, 'v = 0 and i % 10 = 2..4 and i % 100 != 12..14 or f % 10 = 2..4 and f % 100 != 12..14', '10.2', 'bs few' ),
+ array( 1, 'v = 0 and i % 10 = 2..4 and i % 100 != 12..14 or f % 10 = 2..4 and f % 100 != 12..14', '10.0', 'bs other' ),
+
);
return $tests;