From: Tim Starling Date: Thu, 16 Aug 2012 10:36:08 +0000 (+1000) Subject: CLDR plural parser in PHP X-Git-Tag: 1.31.0-rc.0~22571^2~1 X-Git-Url: http://git.cyclocoop.org/%24action?a=commitdiff_plain;h=fceb9bcb49fdeaaf978a1667ef382aedb6e715d1;p=lhc%2Fweb%2Fwiklou.git CLDR plural parser in PHP Wrote a CLDR plural rule parser to replace the eval()-based one from I58a9cdfe. It converts the infix notation of the XML files to a sanitized RPN notation, referred to in external interfaces as the "compiled" form. The RPN notation is cached and then executed by a fast non-validating evaluator. Timings for the largest rule in the XML file are ~1.2ms for compilation and ~200us for execution. Also: * Lazy-load the plural rules when recache() requests them, instead of loading them for every request. * Language::convertPlural() needs integer keys, and CLDR only gives string keys. The previous code was not mapping them so it didn't work at all. I just mapped them in the order they appear in the XML file, i.e. the first rule becomes MediaWiki's $pluralForm=0, the second becomes $pluralForm=1, etc. Not sure if there is a more rigorous way to do it. Change-Id: I65ee788c1a8e5ee2ede2091990d86eb722749dd3 --- diff --git a/includes/LocalisationCache.php b/includes/LocalisationCache.php index c9dd69754d..c1ac848490 100644 --- a/includes/LocalisationCache.php +++ b/includes/LocalisationCache.php @@ -154,10 +154,11 @@ class LocalisationCache { */ static public $preloadedKeys = array( 'dateFormats', 'namespaceNames' ); - /* - * Associative array containing plural rules. + /** + * Associative array of cached plural rules. The key is the language code, + * the value is an array of plural rules for that language. */ - var $pluralRules = array(); + var $pluralRules = null; var $mergeableKeys = null; @@ -207,7 +208,6 @@ class LocalisationCache { $this->$var = $conf[$var]; } } - $this->readPluralRules(); } /** @@ -491,36 +491,62 @@ class LocalisationCache { } return $data; } + /** - * Read the plural rule xml files. - * First the CLDR xml will be read and it will be extended with - * mediawiki specific tailoring. + * Get the compiled plural rules for a given language from the XML files. * @since 1.20 */ - protected function readPluralRules() { - $CLDRPlural = __DIR__ . "/../languages/data/plurals.xml"; - $MWPlural = __DIR__ . "/../languages/data/plurals-mediawiki.xml"; - # Load CLDR plural rules - $this->parsePluralXML( $CLDRPlural ); - if ( file_exists( $MWPlural ) ) { - // override or extend. - $this->parsePluralXML( $MWPlural ); + public function getCompiledPluralRules( $code ) { + $rules = $this->getPluralRules( $code ); + try { + $compiledRules = CLDRPluralRuleEvaluator::compile( $rules ); + } catch( CLDRPluralRuleError $e ) { + wfDebugLog( 'l10n', $e->getMessage() . "\n" ); + return array(); } + return $compiledRules; } - private function parsePluralXML( $xmlFile ) { - $pluraldoc = new DOMDocument(); - $pluraldoc->load( $xmlFile ); - $rulesets = $pluraldoc->getElementsByTagName( "pluralRules" ); + /** + * Get the plural rules for a given language from the XML files. + * Cached. + * @since 1.20 + */ + public function getPluralRules( $code ) { + if ( $this->pluralRules === null ) { + $cldrPlural = __DIR__ . "/../languages/data/plurals.xml"; + $mwPlural = __DIR__ . "/../languages/data/plurals-mediawiki.xml"; + // Load CLDR plural rules + $this->loadPluralFile( $cldrPlural ); + if ( file_exists( $mwPlural ) ) { + // Override or extend + $this->loadPluralFile( $mwPlural ); + } + } + if ( !isset( $this->pluralRules[$code] ) ) { + return array(); + } else { + return $this->pluralRules[$code]; + } + } + + /** + * Load a plural XML file with the given filename, compile the relevant + * rules, and save the compiled rules in a process-local cache. + */ + private function loadPluralFile( $fileName ) { + $doc = new DOMDocument; + $doc->load( $fileName ); + $rulesets = $doc->getElementsByTagName( "pluralRules" ); foreach ( $rulesets as $ruleset ) { $codes = $ruleset->getAttribute( 'locales' ); - $parsedRules = array(); - $rules = $ruleset->getElementsByTagName( "pluralRule" ); - foreach ( $rules as $rule ) { - $parsedRules[$rule->getAttribute( 'count' )] = $rule->nodeValue; + $rules = array(); + $ruleElements = $ruleset->getElementsByTagName( "pluralRule" ); + foreach ( $ruleElements as $elt ) { + $rules[] = $elt->nodeValue; } foreach ( explode( ' ', $codes ) as $code ) { - $this->pluralRules[$code] = $parsedRules; + $this->pluralRules[$code] = $rules; } } } @@ -728,10 +754,10 @@ class LocalisationCache { foreach ( self::$splitKeys as $key ) { $allData['list'][$key] = array_keys( $allData[$key] ); } - # Load CLDR plural rules - if ( isset( $this->pluralRules[$code] ) ) { - $allData['pluralRules'] = $this->pluralRules[$code]; - } + # Load CLDR plural rules for JavaScript + $allData['pluralRules'] = $this->getPluralRules( $code ); + # And for PHP + $allData['compiledPluralRules'] = $this->getCompiledPluralRules( $code ); # Run hooks wfRunHooks( 'LocalisationCacheRecache', array( $this, $code, &$allData ) ); diff --git a/languages/Language.php b/languages/Language.php index e67c086518..d1a38bb575 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -4190,6 +4190,15 @@ class Language { return $this->mConverter->getConvRuleTitle(); } + /** + * Get the compiled plural rules for the language + * @since 1.20 + * @return array Associative array with plural form, and plural rule as key-value pairs + */ + public function getCompiledPluralRules() { + return self::$dataCache->getItem( strtolower( $this->mCode ), 'compiledPluralRules' ); + } + /** * Get the plural rules for the language * @since 1.20 @@ -4205,8 +4214,8 @@ class Language { * @return int The index of the plural form */ private function getPluralForm( $number ) { - $pluralRules = $this->getPluralRules(); - $form = CLDRPluralRuleEvaluator::evaluate( $number, $pluralRules ); + $pluralRules = $this->getCompiledPluralRules(); + $form = CLDRPluralRuleEvaluator::evaluateCompiled( $number, $pluralRules ); return $form; } diff --git a/languages/utils/CLDRPluralRuleEvaluator.php b/languages/utils/CLDRPluralRuleEvaluator.php index f420e41b58..6b11704376 100644 --- a/languages/utils/CLDRPluralRuleEvaluator.php +++ b/languages/utils/CLDRPluralRuleEvaluator.php @@ -1,12 +1,15 @@ rule format. * @return int The index of the plural form which passed the evaluation */ - public static function evaluate( $number, $rules ) { - $formIndex = 0; - if ( !$rules ) { - return 0; + public static function evaluate( $number, array $rules ) { + $rules = self::compile( $rules ); + return self::evaluateCompiled( $number, $rules ); + } + + /** + * Convert a set of rules to a compiled form which is optimised for + * fast evaluation. The result will be an array of strings, and may be cached. + * + * @param $rules The rules to compile + * @return An array of compile rules. + */ + public static function compile( array $rules ) { + // We can't use array_map() for this because it generates a warning if + // there is an exception. + foreach ( $rules as &$rule ) { + $rule = CLDRPluralRuleConverter::convert( $rule ); } - foreach ( $rules as $form => $rule ) { - $parsedRule = self::parseCLDRRule( $rule, $number ); - // FIXME eval is bad. - if ( eval( "return $parsedRule;" ) ) { - return $formIndex; + return $rules; + } + + /** + * Evaluate a compiled set of rules returned by compile(). Do not allow + * the user to edit the compiled form, or else PHP errors may result. + */ + public static function evaluateCompiled( $number, array $rules ) { + // The compiled form is RPN, with tokens strictly delimited by + // spaces, so this is a simple RPN evaluator. + foreach ( $rules as $i => $rule ) { + $stack = array(); + $zero = ord( '0' ); + $nine = ord( '9' ); + foreach ( StringUtils::explode( ' ', $rule ) as $token ) { + $ord = ord( $token ); + if ( $token === 'n' ) { + $stack[] = $number; + } elseif ( $ord >= $zero && $ord <= $nine ) { + $stack[] = intval( $token ); + } else { + $right = array_pop( $stack ); + $left = array_pop( $stack ); + $result = self::doOperation( $token, $left, $right ); + $stack[] = $result; + } + } + if ( $stack[0] ) { + return $i; } - $formIndex++; } - return $formIndex; - } - private static function parseCLDRRule( $rule ) { - $rule = preg_replace( '/\bn\b/', '$number', $rule ); - $rule = preg_replace( '/([^ ]+) mod (\d+)/', 'self::mod(\1,\2)', $rule ); - $rule = preg_replace( '/([^ ]+) is not (\d+)/' , '\1!=\2', $rule ); - $rule = preg_replace( '/([^ ]+) is (\d+)/', '\1==\2', $rule ); - $rule = preg_replace( '/([^ ]+) not in (\d+)\.\.(\d+)/', '!self::in(\1,\2,\3)', $rule ); - $rule = preg_replace( '/([^ ]+) not within (\d+)\.\.(\d+)/', '!self::within(\1,\2,\3)', $rule ); - $rule = preg_replace( '/([^ ]+) in (\d+)\.\.(\d+)/', 'self::in(\1,\2,\3)', $rule ); - $rule = preg_replace( '/([^ ]+) within (\d+)\.\.(\d+)/', 'self::within(\1,\2,\3)', $rule ); - // AND takes precedence over OR - $andrule = '/([^ ]+) and ([^ ]+)/i'; - while ( preg_match( $andrule, $rule ) ) { - $rule = preg_replace( $andrule, '(\1&&\2)', $rule ); + // None of the provided rules match. The number belongs to caregory + // 'other' which comes last. + return count( $rules ); + } + + /** + * Do a single operation + * + * @param $token string The token string + * @param $left The left operand. If it is an object, its state may be destroyed. + * @param $right The right operand + * @return mixed + */ + private static function doOperation( $token, $left, $right ) { + if ( in_array( $token, array( 'in', 'not-in', 'within', 'not-within' ) ) ) { + if ( !($right instanceof CLDRPluralRuleEvaluator_Range ) ) { + $right = new CLDRPluralRuleEvaluator_Range( $right ); + } } - $orrule = '/([^ ]+) or ([^ ]+)/i'; - while ( preg_match( $orrule, $rule ) ) { - $rule = preg_replace( $orrule, '(\1||\2)', $rule ); + switch ( $token ) { + case 'or': + return $left || $right; + case 'and': + return $left && $right; + case 'is': + return $left == $right; + case 'is-not': + return $left != $right; + case 'in': + return $right->isNumberIn( $left ); + case 'not-in': + return !$right->isNumberIn( $left ); + case 'within': + return $right->isNumberWithin( $left ); + case 'not-within': + return !$right->isNumberWithin( $left ); + case 'mod': + if ( is_int( $left ) ) { + return (int) fmod( $left, $right ); + } + return fmod( $left, $right ); + case ',': + if ( $left instanceof CLDRPluralRuleEvaluator_Range ) { + $range = $left; + } else { + $range = new CLDRPluralRuleEvaluator_Range( $left ); + } + $range->add( $right ); + return $range; + case '..': + return new CLDRPluralRuleEvaluator_Range( $left, $right ); + default: + throw new CLDRPluralRuleError( "Invalid RPN token" ); } + } +} + +/** + * Evaluator helper class representing a range list. + */ +class CLDRPluralRuleEvaluator_Range { + var $parts = array(); - return $rule; + function __construct( $start, $end = false ) { + if ( $end === false ) { + $this->parts[] = $start; + } else { + $this->parts[] = array( $start, $end ); + } } - private static function in( $num, $low, $high ) { - return is_int( $num ) && $num >= $low && $num <= $high; + /** + * Determine if the given number is inside the range. If $integerConstraint + * is true, the number must additionally be an integer if it is to match + * any interval part. + */ + function isNumberIn( $number, $integerConstraint = true ) { + foreach ( $this->parts as $part ) { + if ( is_array( $part ) ) { + if ( ( !$integerConstraint || floor( $number ) === (float)$number ) + && $number >= $part[0] && $number <= $part[1] ) + { + return true; + } + } else { + if ( $number == $part ) { + return true; + } + } + } + return false; } - private static function within( $num, $low, $high ) { - return $num >= $low && $num <= $high; + /** + * Readable alias for isNumberIn( $number, false ), and the implementation + * of the "within" operator. + */ + function isNumberWithin( $number ) { + return $this->isNumberIn( $number, false ); } - private static function mod( $num, $mod ) { - if ( is_int( $num ) ) { - return (int) fmod( $num, $mod ); + /** + * Add another part to this range. The supplied new part may either be a + * range object itself, or a single number. + */ + function add( $other ) { + if ( $other instanceof self ) { + $this->parts = array_merge( $this->parts, $other->parts ); + } else { + $this->parts[] = $other; } - return fmod( $num, $mod ); + } + + /** + * For debugging + */ + function __toString() { + $s = 'Range('; + foreach ( $this->parts as $i => $part ) { + if ( $i ) { + $s .= ', '; + } + if ( is_array( $part ) ) { + $s .= $part[0] . '..' . $part[1]; + } else { + $s .= $part; + } + } + $s .= ')'; + return $s; + } + +} + +/** + * Helper class for converting rules to reverse polish notation (RPN). + */ +class CLDRPluralRuleConverter { + var $rule, $pos, $end; + var $operators = array(); + var $operands = array(); + + /** + * Precedence levels. Note that there's no need to worry about associativity + * for the level 4 operators, since they return boolean and don't accept + * boolean inputs. + */ + static $precedence = array( + 'or' => 2, + 'and' => 3, + 'is' => 4, + 'is-not' => 4, + 'in' => 4, + 'not-in' => 4, + 'within' => 4, + 'not-within' => 4, + 'mod' => 5, + ',' => 6, + '..' => 7, + ); + + /** + * A character list defining whitespace, for use in strspn() etc. + */ + const WHITESPACE_CLASS = " \t\r\n"; + + /** + * Same for digits. Note that the grammar given in UTS #35 doesn't allow + * negative numbers or decimals. + */ + const NUMBER_CLASS = '0123456789'; + + /** + * An anchored regular expression which matches a word at the current offset. + */ + const WORD_REGEX = '/[a-zA-Z]+/A'; + + /** + * Convert a rule to RPN. This is the only public entry point. + */ + public static function convert( $rule ) { + $parser = new self( $rule ); + return $parser->doConvert(); + } + + /** + * Private constructor. + */ + protected function __construct( $rule ) { + $this->rule = $rule; + $this->pos = 0; + $this->end = strlen( $rule ); + } + + /** + * Do the operation. + */ + protected function doConvert() { + $expectOperator = true; + + // Iterate through all tokens, saving the operators and operands to a + // stack per Dijkstra's shunting yard algorithm. + while ( false !== ( $token = $this->nextToken() ) ) { + // In this grammar, there are only binary operators, so every valid + // rule string will alternate between operator and operand tokens. + $expectOperator = !$expectOperator; + + if ( $token instanceof CLDRPluralRuleConverter_Expression ) { + // Operand + if ( $expectOperator ) { + $token->error( 'unexpected operand' ); + } + $this->operands[] = $token; + continue; + } else { + // Operator + if ( !$expectOperator ) { + $token->error( 'unexpected operator' ); + } + // Resolve higher precedence levels + $lastOp = end( $this->operators ); + while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) { + $this->doOperation( $lastOp, $this->operands ); + array_pop( $this->operators ); + $lastOp = end( $this->operators ); + } + $this->operators[] = $token; + } + } + + // Finish off the stack + while ( $op = array_pop( $this->operators ) ) { + $this->doOperation( $op, $this->operands ); + } + + // Make sure the result is sane. The first case is possible for an empty + // string input, the second should be unreachable. + if ( !count( $this->operands ) ) { + $this->error( 'condition expected' ); + } elseif ( count( $this->operands ) > 1 ) { + $this->error( 'missing operator or too many operands' ); + } + + $value = $this->operands[0]; + if ( $value->type !== 'boolean' ) { + $this->error( 'the result must have a boolean type' ); + } + + return $this->operands[0]->rpn; + } + + /** + * Fetch the next token from the input string. Return it as a + * CLDRPluralRuleConverter_Fragment object. + */ + protected function nextToken() { + if ( $this->pos >= $this->end ) { + return false; + } + + // Whitespace + $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos ); + $this->pos += $length; + + if ( $this->pos >= $this->end ) { + return false; + } + + // Number + $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos ); + if ( $length !== 0 ) { + $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos ); + $this->pos += $length; + return $token; + } + + // Comma + if ( $this->rule[$this->pos] === ',' ) { + $token = $this->newOperator( ',', $this->pos, 1 ); + $this->pos ++; + return $token; + } + + // Dot dot + if ( substr( $this->rule, $this->pos, 2 ) === '..' ) { + $token = $this->newOperator( '..', $this->pos, 2 ); + $this->pos += 2; + return $token; + } + + // Word + if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) { + $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' ); + } + $word1 = strtolower( $m[0] ); + $word2 = ''; + $nextTokenPos = $this->pos + strlen( $word1 ); + if ( $word1 === 'not' || $word1 === 'is' ) { + // Look ahead one word + $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos ); + if ( $nextTokenPos < $this->end + && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos ) ) + { + $word2 = strtolower( $m[0] ); + $nextTokenPos += strlen( $word2 ); + } + } + + // Two-word operators like "is not" take precedence over single-word operators like "is" + if ( $word2 !== '' ) { + $bothWords = "{$word1}-{$word2}"; + if ( isset( self::$precedence[$bothWords] ) ) { + $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos ); + $this->pos = $nextTokenPos; + return $token; + } + } + + // Single-word operators + if ( isset( self::$precedence[$word1] ) ) { + $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) ); + $this->pos += strlen( $word1 ); + return $token; + } + + // The special numerical keyword "n" + if ( $word1 === 'n' ) { + $token = $this->newNumber( 'n', $this->pos ); + $this->pos ++; + return $token; + } + + $this->error( 'unrecognised word' ); + } + + /** + * For the binary operator $op, pop its operands off the stack and push + * a fragment with rpn and type members describing the result of that + * operation. + */ + protected function doOperation( $op ) { + if ( count( $this->operands ) < 2 ) { + $op->error( 'missing operand' ); + } + $right = array_pop( $this->operands ); + $left = array_pop( $this->operands ); + $result = $op->operate( $left, $right ); + $this->operands[] = $result; + } + + /** + * Create a numerical expression object + */ + protected function newNumber( $text, $pos ) { + return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) ); + } + + /** + * Create a binary operator + */ + protected function newOperator( $type, $pos, $length ) { + return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length ); + } + + /** + * Throw an error + */ + protected function error( $message ) { + throw new CLDRPluralRuleError( $message ); + } +} + +/** + * Helper for CLDRPluralRuleConverter. + * The base class for operators and expressions, describing a region of the input string. + */ +class CLDRPluralRuleConverter_Fragment { + var $parser, $pos, $length, $end; + + function __construct( $parser, $pos, $length ) { + $this->parser = $parser; + $this->pos = $pos; + $this->length = $length; + $this->end = $pos + $length; + } + + public function error( $message ) { + $text = $this->getText(); + throw new CLDRPluralRuleError( "$message at position " . ( $this->pos + 1 ) . ": \"$text\"" ); + } + + public function getText() { + return substr( $this->parser->rule, $this->pos, $this->length ); + } +} + +/** + * Helper for CLDRPluralRuleConverter. + * An expression object, representing a region of the input string (for error + * messages), the RPN notation used to evaluate it, and the result type for + * validation. + */ +class CLDRPluralRuleConverter_Expression extends CLDRPluralRuleConverter_Fragment { + var $type, $rpn; + + function __construct( $parser, $type, $rpn, $pos, $length ) { + parent::__construct( $parser, $pos, $length ); + $this->type = $type; + $this->rpn = $rpn; + } + + public function isType( $type ) { + if ( $type === 'range' && ( $this->type === 'range' || $this->type === 'number' ) ) { + return true; + } + if ( $type === $this->type ) { + return true; + } + return false; + } +} + +/** + * Helper for CLDRPluralRuleConverter. + * An operator object, representing a region of the input string (for error + * messages), and the binary operator at that location. + */ +class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment { + var $name; + + /** + * Each op type has three characters: left operand type, right operand type and result type + * + * b = boolean + * n = number + * r = range + * + * A number is a kind of range. + */ + static $opTypes = array( + 'or' => 'bbb', + 'and' => 'bbb', + 'is' => 'nnb', + 'is-not' => 'nnb', + 'in' => 'nrb', + 'not-in' => 'nrb', + 'within' => 'nrb', + 'not-within' => 'nrb', + 'mod' => 'nnn', + ',' => 'rrr', + '..' => 'nnr', + ); + + /** + * Map converting from the abbrevation to the full form. + */ + static $typeSpecMap = array( + 'b' => 'boolean', + 'n' => 'number', + 'r' => 'range', + ); + + function __construct( $parser, $name, $pos, $length ) { + parent::__construct( $parser, $pos, $length ); + $this->name = $name; + } + + public function operate( $left, $right ) { + $typeSpec = self::$opTypes[$this->name]; + + $leftType = self::$typeSpecMap[$typeSpec[0]]; + $rightType = self::$typeSpecMap[$typeSpec[1]]; + $resultType = self::$typeSpecMap[$typeSpec[2]]; + + $start = min( $this->pos, $left->pos, $right->pos ); + $end = max( $this->end, $left->end, $right->end ); + $length = $end - $start; + + $newExpr = new CLDRPluralRuleConverter_Expression( $this->parser, $resultType, + "{$left->rpn} {$right->rpn} {$this->name}", + $start, $length ); + + if ( !$left->isType( $leftType ) ) { + $newExpr->error( "invalid type for left operand: expected $leftType, got {$left->type}" ); + } + + if ( !$right->isType( $rightType ) ) { + $newExpr->error( "invalid type for right operand: expected $rightType, got {$right->type}" ); + } + return $newExpr; + } +} + +/** + * The exception class for all the classes in this file. This will be thrown + * back to the caller if there is any validation error. + */ +class CLDRPluralRuleError extends MWException { + function __construct( $message ) { + parent::__construct( 'CLDR plural rule error: ' . $message ); } } diff --git a/tests/phpunit/languages/utils/CLDRPluralRuleEvaluatorTest.php b/tests/phpunit/languages/utils/CLDRPluralRuleEvaluatorTest.php new file mode 100644 index 0000000000..033164b026 --- /dev/null +++ b/tests/phpunit/languages/utils/CLDRPluralRuleEvaluatorTest.php @@ -0,0 +1,95 @@ +assertEquals( $expected, $result, $comment ); + } + + /** + * @dataProvider invalidTestCases + * @expectedException CLDRPluralRuleError + */ + function testInvalidRules( $rules, $comment ) { + CLDRPluralRuleEvaluator::evaluate( 1, (array) $rules ); + } + + function validTestCases() { + $tests = array( + # expected, number, rule, comment + array( 0, 'n is 1', 1, 'integer number and is' ), + array( 0, 'n is 1', "1", 'string integer number and is' ), + array( 0, 'n is 1', 1.0, 'float number and is' ), + array( 0, 'n is 1', "1.0", 'string float number and is' ), + array( 1, 'n is 1', 1.1, 'float number and is' ), + array( 1, 'n is 1', 2, 'float number and is' ), + + array( 0, 'n in 1,3,5', 3, '' ), + array( 1, 'n not in 1,3,5', 5, '' ), + + array( 1, 'n in 1,3,5', 2, '' ), + array( 0, 'n not in 1,3,5', 4, '' ), + + array( 0, 'n in 1..3', 2, '' ), + array( 0, 'n in 1..3', 3, 'in is inclusive' ), + array( 1, 'n in 1..3', 0, '' ), + + array( 1, 'n not in 1..3', 2, '' ), + array( 1, 'n not in 1..3', 3, 'in is inclusive' ), + array( 0, 'n not in 1..3', 0, '' ), + + array( 1, 'n is not 1 and n is not 2 and n is not 3', 1, 'and relation' ), + array( 0, 'n is not 1 and n is not 2 and n is not 4', 3, 'and relation' ), + + array( 0, 'n is not 1 or n is 1', 1, 'or relation' ), + array( 1, 'n is 1 or n is 2', 3, 'or relation' ), + + array( 0, 'n is 1', 1, 'extra whitespace' ), + + array( 0, 'n mod 3 is 1', 7, 'mod' ), + array( 0, 'n mod 3 is not 1', 4.3, 'mod with floats' ), + + array( 0, 'n within 1..3', 2, 'within with integer' ), + array( 0, 'n within 1..3', 2.5, 'within with float' ), + array( 0, 'n in 1..3', 2, 'in with integer' ), + array( 1, 'n in 1..3', 2.5, 'in with float' ), + + array( 0, 'n in 3 or n is 4 and n is 5', 3, 'and binds more tightly than or' ), + array( 1, 'n is 3 or n is 4 and n is 5', 4, 'and binds more tightly than or' ), + + array( 0, 'n mod 10 in 3..4,9 and n mod 100 not in 10..19,70..79,90..99', 24, 'breton rule' ), + array( 1, 'n mod 10 in 3..4,9 and n mod 100 not in 10..19,70..79,90..99', 25, 'breton rule' ), + + array( 0, 'n within 0..2 and n is not 2', 0, 'french rule' ), + array( 0, 'n within 0..2 and n is not 2', 1, 'french rule' ), + array( 0, 'n within 0..2 and n is not 2', 1.2, 'french rule' ), + array( 1, 'n within 0..2 and n is not 2', 2, 'french rule' ), + + array( 1, 'n in 3..10,13..19', 2, 'scottish rule - ranges with comma' ), + array( 0, 'n in 3..10,13..19', 4, 'scottish rule - ranges with comma' ), + array( 1, 'n in 3..10,13..19', 12.999, 'scottish rule - ranges with comma' ), + array( 0, 'n in 3..10,13..19', 13, 'scottish rule - ranges with comma' ), + + array( 0, '5 mod 3 is n', 2, 'n as result of mod - no need to pass' ), + ); + + return $tests; + } + + function invalidTestCases() { + $tests = array( + array( 'n mod mod 5 is 1', 'mod mod' ), + array( 'n', 'just n' ), + array( 'n is in 5', 'is in' ), + ); + return $tests; + } + +}