3 * Parse and evaluate a plural rule.
5 * http://unicode.org/reports/tr35/#Language_Plural_Rules
7 * @author Niklas Laxstrom, Tim Starling
9 * @copyright Copyright © 2010-2012, Niklas Laxström
10 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
15 class CLDRPluralRuleEvaluator
{
17 * Evaluate a number against a set of plural rules. If a rule passes,
18 * return the index of plural rule.
20 * @param int The number to be evaluated against the rules
21 * @param array The associative array of plural rules in pluralform => rule format.
22 * @return int The index of the plural form which passed the evaluation
24 public static function evaluate( $number, array $rules ) {
25 $rules = self
::compile( $rules );
26 return self
::evaluateCompiled( $number, $rules );
30 * Convert a set of rules to a compiled form which is optimised for
31 * fast evaluation. The result will be an array of strings, and may be cached.
33 * @param $rules The rules to compile
34 * @return An array of compile rules.
36 public static function compile( array $rules ) {
37 // We can't use array_map() for this because it generates a warning if
38 // there is an exception.
39 foreach ( $rules as &$rule ) {
40 $rule = CLDRPluralRuleConverter
::convert( $rule );
46 * Evaluate a compiled set of rules returned by compile(). Do not allow
47 * the user to edit the compiled form, or else PHP errors may result.
49 public static function evaluateCompiled( $number, array $rules ) {
50 // The compiled form is RPN, with tokens strictly delimited by
51 // spaces, so this is a simple RPN evaluator.
52 foreach ( $rules as $i => $rule ) {
56 foreach ( StringUtils
::explode( ' ', $rule ) as $token ) {
58 if ( $token === 'n' ) {
60 } elseif ( $ord >= $zero && $ord <= $nine ) {
61 $stack[] = intval( $token );
63 $right = array_pop( $stack );
64 $left = array_pop( $stack );
65 $result = self
::doOperation( $token, $left, $right );
73 // None of the provided rules match. The number belongs to caregory
74 // 'other' which comes last.
75 return count( $rules );
79 * Do a single operation
81 * @param $token string The token string
82 * @param $left The left operand. If it is an object, its state may be destroyed.
83 * @param $right The right operand
86 private static function doOperation( $token, $left, $right ) {
87 if ( in_array( $token, array( 'in', 'not-in', 'within', 'not-within' ) ) ) {
88 if ( !($right instanceof CLDRPluralRuleEvaluator_Range
) ) {
89 $right = new CLDRPluralRuleEvaluator_Range( $right );
94 return $left ||
$right;
96 return $left && $right;
98 return $left == $right;
100 return $left != $right;
102 return $right->isNumberIn( $left );
104 return !$right->isNumberIn( $left );
106 return $right->isNumberWithin( $left );
108 return !$right->isNumberWithin( $left );
110 if ( is_int( $left ) ) {
111 return (int) fmod( $left, $right );
113 return fmod( $left, $right );
115 if ( $left instanceof CLDRPluralRuleEvaluator_Range
) {
118 $range = new CLDRPluralRuleEvaluator_Range( $left );
120 $range->add( $right );
123 return new CLDRPluralRuleEvaluator_Range( $left, $right );
125 throw new CLDRPluralRuleError( "Invalid RPN token" );
131 * Evaluator helper class representing a range list.
133 class CLDRPluralRuleEvaluator_Range
{
134 var $parts = array();
136 function __construct( $start, $end = false ) {
137 if ( $end === false ) {
138 $this->parts
[] = $start;
140 $this->parts
[] = array( $start, $end );
145 * Determine if the given number is inside the range. If $integerConstraint
146 * is true, the number must additionally be an integer if it is to match
149 function isNumberIn( $number, $integerConstraint = true ) {
150 foreach ( $this->parts
as $part ) {
151 if ( is_array( $part ) ) {
152 if ( ( !$integerConstraint ||
floor( $number ) === (float)$number )
153 && $number >= $part[0] && $number <= $part[1] )
158 if ( $number == $part ) {
167 * Readable alias for isNumberIn( $number, false ), and the implementation
168 * of the "within" operator.
170 function isNumberWithin( $number ) {
171 return $this->isNumberIn( $number, false );
175 * Add another part to this range. The supplied new part may either be a
176 * range object itself, or a single number.
178 function add( $other ) {
179 if ( $other instanceof self
) {
180 $this->parts
= array_merge( $this->parts
, $other->parts
);
182 $this->parts
[] = $other;
189 function __toString() {
191 foreach ( $this->parts
as $i => $part ) {
195 if ( is_array( $part ) ) {
196 $s .= $part[0] . '..' . $part[1];
208 * Helper class for converting rules to reverse polish notation (RPN).
210 class CLDRPluralRuleConverter
{
211 var $rule, $pos, $end;
212 var $operators = array();
213 var $operands = array();
216 * Precedence levels. Note that there's no need to worry about associativity
217 * for the level 4 operators, since they return boolean and don't accept
220 static $precedence = array(
235 * A character list defining whitespace, for use in strspn() etc.
237 const WHITESPACE_CLASS
= " \t\r\n";
240 * Same for digits. Note that the grammar given in UTS #35 doesn't allow
241 * negative numbers or decimals.
243 const NUMBER_CLASS
= '0123456789';
246 * An anchored regular expression which matches a word at the current offset.
248 const WORD_REGEX
= '/[a-zA-Z]+/A';
251 * Convert a rule to RPN. This is the only public entry point.
253 public static function convert( $rule ) {
254 $parser = new self( $rule );
255 return $parser->doConvert();
259 * Private constructor.
261 protected function __construct( $rule ) {
264 $this->end
= strlen( $rule );
270 protected function doConvert() {
271 $expectOperator = true;
273 // Iterate through all tokens, saving the operators and operands to a
274 // stack per Dijkstra's shunting yard algorithm.
275 while ( false !== ( $token = $this->nextToken() ) ) {
276 // In this grammar, there are only binary operators, so every valid
277 // rule string will alternate between operator and operand tokens.
278 $expectOperator = !$expectOperator;
280 if ( $token instanceof CLDRPluralRuleConverter_Expression
) {
282 if ( $expectOperator ) {
283 $token->error( 'unexpected operand' );
285 $this->operands
[] = $token;
289 if ( !$expectOperator ) {
290 $token->error( 'unexpected operator' );
292 // Resolve higher precedence levels
293 $lastOp = end( $this->operators
);
294 while ( $lastOp && self
::$precedence[$token->name
] <= self
::$precedence[$lastOp->name
] ) {
295 $this->doOperation( $lastOp, $this->operands
);
296 array_pop( $this->operators
);
297 $lastOp = end( $this->operators
);
299 $this->operators
[] = $token;
303 // Finish off the stack
304 while ( $op = array_pop( $this->operators
) ) {
305 $this->doOperation( $op, $this->operands
);
308 // Make sure the result is sane. The first case is possible for an empty
309 // string input, the second should be unreachable.
310 if ( !count( $this->operands
) ) {
311 $this->error( 'condition expected' );
312 } elseif ( count( $this->operands
) > 1 ) {
313 $this->error( 'missing operator or too many operands' );
316 $value = $this->operands
[0];
317 if ( $value->type
!== 'boolean' ) {
318 $this->error( 'the result must have a boolean type' );
321 return $this->operands
[0]->rpn
;
325 * Fetch the next token from the input string. Return it as a
326 * CLDRPluralRuleConverter_Fragment object.
328 protected function nextToken() {
329 if ( $this->pos
>= $this->end
) {
334 $length = strspn( $this->rule
, self
::WHITESPACE_CLASS
, $this->pos
);
335 $this->pos +
= $length;
337 if ( $this->pos
>= $this->end
) {
342 $length = strspn( $this->rule
, self
::NUMBER_CLASS
, $this->pos
);
343 if ( $length !== 0 ) {
344 $token = $this->newNumber( substr( $this->rule
, $this->pos
, $length ), $this->pos
);
345 $this->pos +
= $length;
350 if ( $this->rule
[$this->pos
] === ',' ) {
351 $token = $this->newOperator( ',', $this->pos
, 1 );
357 if ( substr( $this->rule
, $this->pos
, 2 ) === '..' ) {
358 $token = $this->newOperator( '..', $this->pos
, 2 );
364 if ( !preg_match( self
::WORD_REGEX
, $this->rule
, $m, 0, $this->pos
) ) {
365 $this->error( 'unexpected character "' . $this->rule
[$this->pos
] . '"' );
367 $word1 = strtolower( $m[0] );
369 $nextTokenPos = $this->pos +
strlen( $word1 );
370 if ( $word1 === 'not' ||
$word1 === 'is' ) {
371 // Look ahead one word
372 $nextTokenPos +
= strspn( $this->rule
, self
::WHITESPACE_CLASS
, $nextTokenPos );
373 if ( $nextTokenPos < $this->end
374 && preg_match( self
::WORD_REGEX
, $this->rule
, $m, 0, $nextTokenPos ) )
376 $word2 = strtolower( $m[0] );
377 $nextTokenPos +
= strlen( $word2 );
381 // Two-word operators like "is not" take precedence over single-word operators like "is"
382 if ( $word2 !== '' ) {
383 $bothWords = "{$word1}-{$word2}";
384 if ( isset( self
::$precedence[$bothWords] ) ) {
385 $token = $this->newOperator( $bothWords, $this->pos
, $nextTokenPos - $this->pos
);
386 $this->pos
= $nextTokenPos;
391 // Single-word operators
392 if ( isset( self
::$precedence[$word1] ) ) {
393 $token = $this->newOperator( $word1, $this->pos
, strlen( $word1 ) );
394 $this->pos +
= strlen( $word1 );
398 // The special numerical keyword "n"
399 if ( $word1 === 'n' ) {
400 $token = $this->newNumber( 'n', $this->pos
);
405 $this->error( 'unrecognised word' );
409 * For the binary operator $op, pop its operands off the stack and push
410 * a fragment with rpn and type members describing the result of that
413 protected function doOperation( $op ) {
414 if ( count( $this->operands
) < 2 ) {
415 $op->error( 'missing operand' );
417 $right = array_pop( $this->operands
);
418 $left = array_pop( $this->operands
);
419 $result = $op->operate( $left, $right );
420 $this->operands
[] = $result;
424 * Create a numerical expression object
426 protected function newNumber( $text, $pos ) {
427 return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) );
431 * Create a binary operator
433 protected function newOperator( $type, $pos, $length ) {
434 return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length );
440 protected function error( $message ) {
441 throw new CLDRPluralRuleError( $message );
446 * Helper for CLDRPluralRuleConverter.
447 * The base class for operators and expressions, describing a region of the input string.
449 class CLDRPluralRuleConverter_Fragment
{
450 var $parser, $pos, $length, $end;
452 function __construct( $parser, $pos, $length ) {
453 $this->parser
= $parser;
455 $this->length
= $length;
456 $this->end
= $pos +
$length;
459 public function error( $message ) {
460 $text = $this->getText();
461 throw new CLDRPluralRuleError( "$message at position " . ( $this->pos +
1 ) . ": \"$text\"" );
464 public function getText() {
465 return substr( $this->parser
->rule
, $this->pos
, $this->length
);
470 * Helper for CLDRPluralRuleConverter.
471 * An expression object, representing a region of the input string (for error
472 * messages), the RPN notation used to evaluate it, and the result type for
475 class CLDRPluralRuleConverter_Expression
extends CLDRPluralRuleConverter_Fragment
{
478 function __construct( $parser, $type, $rpn, $pos, $length ) {
479 parent
::__construct( $parser, $pos, $length );
484 public function isType( $type ) {
485 if ( $type === 'range' && ( $this->type
=== 'range' ||
$this->type
=== 'number' ) ) {
488 if ( $type === $this->type
) {
496 * Helper for CLDRPluralRuleConverter.
497 * An operator object, representing a region of the input string (for error
498 * messages), and the binary operator at that location.
500 class CLDRPluralRuleConverter_Operator
extends CLDRPluralRuleConverter_Fragment
{
504 * Each op type has three characters: left operand type, right operand type and result type
510 * A number is a kind of range.
512 static $opTypes = array(
520 'not-within' => 'nrb',
527 * Map converting from the abbrevation to the full form.
529 static $typeSpecMap = array(
535 function __construct( $parser, $name, $pos, $length ) {
536 parent
::__construct( $parser, $pos, $length );
540 public function operate( $left, $right ) {
541 $typeSpec = self
::$opTypes[$this->name
];
543 $leftType = self
::$typeSpecMap[$typeSpec[0]];
544 $rightType = self
::$typeSpecMap[$typeSpec[1]];
545 $resultType = self
::$typeSpecMap[$typeSpec[2]];
547 $start = min( $this->pos
, $left->pos
, $right->pos
);
548 $end = max( $this->end
, $left->end
, $right->end
);
549 $length = $end - $start;
551 $newExpr = new CLDRPluralRuleConverter_Expression( $this->parser
, $resultType,
552 "{$left->rpn} {$right->rpn} {$this->name}",
555 if ( !$left->isType( $leftType ) ) {
556 $newExpr->error( "invalid type for left operand: expected $leftType, got {$left->type}" );
559 if ( !$right->isType( $rightType ) ) {
560 $newExpr->error( "invalid type for right operand: expected $rightType, got {$right->type}" );
567 * The exception class for all the classes in this file. This will be thrown
568 * back to the caller if there is any validation error.
570 class CLDRPluralRuleError
extends MWException
{
571 function __construct( $message ) {
572 parent
::__construct( 'CLDR plural rule error: ' . $message );