* (T141960) Multi-valued parameters may now be separated using U+001F (Unit Separator)
instead of the pipe character. This will be useful if some of the multiple
values need to contain pipes, e.g. for action=options.
+* The API will now warn if input is not NFC-normalized Unicode or if it
+ contains invalid characters.
+* The 'normalized' list output by action=query and other modules that use
+ ApiPageSet may contain entries where the 'from' value is percent-encoded as
+ the raw value cannot be represented in a valid API response. These are
+ indicated by a 'fromencoded' boolean alongside the existing 'from' parameter.
=== Action API internal changes in 1.28 ===
* Added a new hook, 'ApiMakeParserOptions', to allow extensions to better
);
}
}
+
+ // Check for NFC normalization, and warn
+ if ( $rawValue !== $value ) {
+ $this->handleParamNormalization( $paramName, $value, $rawValue );
+ }
}
if ( isset( $value ) && ( $multi || is_array( $type ) ) ) {
return $value;
}
+ /**
+ * Handle when a parameter was Unicode-normalized
+ * @since 1.28
+ * @param string $paramName Unprefixed parameter name
+ * @param string $value Input that will be used.
+ * @param string $rawValue Input before normalization.
+ */
+ protected function handleParamNormalization( $paramName, $value, $rawValue ) {
+ $encParamName = $this->encodeParamName( $paramName );
+ $this->setWarning(
+ "The value passed for '$encParamName' contains invalid or non-normalized data. "
+ . 'Textual data should be valid, NFC-normalized Unicode without '
+ . 'C0 control characters other than HT (\\t), LF (\\n), and CR (\\r).'
+ );
+ }
+
/**
* Split a multi-valued parameter string, like explode()
* @since 1.28
* @since 1.21
*/
public function getNormalizedTitlesAsResult( $result = null ) {
+ global $wgContLang;
+
$values = [];
foreach ( $this->getNormalizedTitles() as $rawTitleStr => $titleStr ) {
+ $encode = ( $wgContLang->normalize( $rawTitleStr ) !== $rawTitleStr );
$values[] = [
- 'from' => $rawTitleStr,
+ 'fromencoded' => $encode,
+ 'from' => $encode ? rawurlencode( $rawTitleStr ) : $rawTitleStr,
'to' => $titleStr
];
}
return $result;
}
+ protected function handleParamNormalization( $paramName, $value, $rawValue ) {
+ parent::handleParamNormalization( $paramName, $value, $rawValue );
+
+ if ( $paramName === 'titles' ) {
+ // For the 'titles' parameter, we want to split it like ApiBase would
+ // and add any changed titles to $this->mNormalizedTitles
+ $value = $this->explodeMultiValue( $value, self::LIMIT_SML2 + 1 );
+ $l = count( $value );
+ $rawValue = $this->explodeMultiValue( $rawValue, $l );
+ for ( $i = 0; $i < $l; $i++ ) {
+ if ( $value[$i] !== $rawValue[$i] ) {
+ $this->mNormalizedTitles[$rawValue[$i]] = $value[$i];
+ }
+ }
+ }
+ }
+
private static $generators = null;
/**
"api-help-param-deprecated": "Deprecated.",
"api-help-param-required": "This parameter is required.",
"api-help-datatypes-header": "Data types",
- "api-help-datatypes": "Some parameter types in API requests need further explanation:\n;boolean\n:Boolean parameters work like HTML checkboxes: if the parameter is specified, regardless of value, it is considered true. For a false value, omit the parameter entirely.\n;timestamp\n:Timestamps may be specified in several formats. ISO 8601 date and time is recommended. All times are in UTC, any included timezone is ignored.\n:* ISO 8601 date and time, <kbd><var>2001</var>-<var>01</var>-<var>15</var>T<var>14</var>:<var>56</var>:<var>00</var>Z</kbd> (punctuation and <kbd>Z</kbd> are optional)\n:* ISO 8601 date and time with (ignored) fractional seconds, <kbd><var>2001</var>-<var>01</var>-<var>15</var>T<var>14</var>:<var>56</var>:<var>00</var>.<var>00001</var>Z</kbd> (dashes, colons, and <kbd>Z</kbd> are optional)\n:* MediaWiki format, <kbd><var>2001</var><var>01</var><var>15</var><var>14</var><var>56</var><var>00</var></kbd>\n:* Generic numeric format, <kbd><var>2001</var>-<var>01</var>-<var>15</var> <var>14</var>:<var>56</var>:<var>00</var></kbd> (optional timezone of <kbd>GMT</kbd>, <kbd>+<var>##</var></kbd>, or <kbd>-<var>##</var></kbd> is ignored)\n:* EXIF format, <kbd><var>2001</var>:<var>01</var>:<var>15</var> <var>14</var>:<var>56</var>:<var>00</var></kbd>\n:*RFC 2822 format (timezone may be omitted), <kbd><var>Mon</var>, <var>15</var> <var>Jan</var> <var>2001</var> <var>14</var>:<var>56</var>:<var>00</var></kbd>\n:* RFC 850 format (timezone may be omitted), <kbd><var>Monday</var>, <var>15</var>-<var>Jan</var>-<var>2001</var> <var>14</var>:<var>56</var>:<var>00</var></kbd>\n:* C ctime format, <kbd><var>Mon</var> <var>Jan</var> <var>15</var> <var>14</var>:<var>56</var>:<var>00</var> <var>2001</var></kbd>\n:* Seconds since 1970-01-01T00:00:00Z as a 1 to 13 digit integer (excluding <kbd>0</kbd>)\n:* The string <kbd>now</kbd>\n;alternative multiple-value separator\n:Parameters that take multiple values are normally submitted with the values separated using the pipe character, e.g. <kbd>param=value1|value2</kbd> or <kbd>param=value1%7Cvalue2</kbd>. If a value must contain the pipe character, use U+001F (Unit Separator) as the separator ''and'' prefix the value with U+001F, e.g. <kbd>param=%1Fvalue1%1Fvalue2</kbd>.",
+ "api-help-datatypes": "Input to MediaWiki should be NFC-normalized UTF-8. MediaWiki may attempt to convert other input, but this may cause some operations (such as [[Special:ApiHelp/edit|edits]] with MD5 checks) to fail.\n\nSome parameter types in API requests need further explanation:\n;boolean\n:Boolean parameters work like HTML checkboxes: if the parameter is specified, regardless of value, it is considered true. For a false value, omit the parameter entirely.\n;timestamp\n:Timestamps may be specified in several formats. ISO 8601 date and time is recommended. All times are in UTC, any included timezone is ignored.\n:* ISO 8601 date and time, <kbd><var>2001</var>-<var>01</var>-<var>15</var>T<var>14</var>:<var>56</var>:<var>00</var>Z</kbd> (punctuation and <kbd>Z</kbd> are optional)\n:* ISO 8601 date and time with (ignored) fractional seconds, <kbd><var>2001</var>-<var>01</var>-<var>15</var>T<var>14</var>:<var>56</var>:<var>00</var>.<var>00001</var>Z</kbd> (dashes, colons, and <kbd>Z</kbd> are optional)\n:* MediaWiki format, <kbd><var>2001</var><var>01</var><var>15</var><var>14</var><var>56</var><var>00</var></kbd>\n:* Generic numeric format, <kbd><var>2001</var>-<var>01</var>-<var>15</var> <var>14</var>:<var>56</var>:<var>00</var></kbd> (optional timezone of <kbd>GMT</kbd>, <kbd>+<var>##</var></kbd>, or <kbd>-<var>##</var></kbd> is ignored)\n:* EXIF format, <kbd><var>2001</var>:<var>01</var>:<var>15</var> <var>14</var>:<var>56</var>:<var>00</var></kbd>\n:*RFC 2822 format (timezone may be omitted), <kbd><var>Mon</var>, <var>15</var> <var>Jan</var> <var>2001</var> <var>14</var>:<var>56</var>:<var>00</var></kbd>\n:* RFC 850 format (timezone may be omitted), <kbd><var>Monday</var>, <var>15</var>-<var>Jan</var>-<var>2001</var> <var>14</var>:<var>56</var>:<var>00</var></kbd>\n:* C ctime format, <kbd><var>Mon</var> <var>Jan</var> <var>15</var> <var>14</var>:<var>56</var>:<var>00</var> <var>2001</var></kbd>\n:* Seconds since 1970-01-01T00:00:00Z as a 1 to 13 digit integer (excluding <kbd>0</kbd>)\n:* The string <kbd>now</kbd>\n;alternative multiple-value separator\n:Parameters that take multiple values are normally submitted with the values separated using the pipe character, e.g. <kbd>param=value1|value2</kbd> or <kbd>param=value1%7Cvalue2</kbd>. If a value must contain the pipe character, use U+001F (Unit Separator) as the separator ''and'' prefix the value with U+001F, e.g. <kbd>param=%1Fvalue1%1Fvalue2</kbd>.",
"api-help-param-type-limit": "Type: integer or <kbd>max</kbd>",
"api-help-param-type-integer": "Type: {{PLURAL:$1|1=integer|2=list of integers}}",
"api-help-param-type-boolean": "Type: boolean ([[Special:ApiHelp/main#main/datatypes|details]])",
}
public static function provideGetParameterFromSettings() {
+ $warnings = [
+ 'The value passed for \'foo\' contains invalid or non-normalized data. Textual data should ' .
+ 'be valid, NFC-normalized Unicode without C0 control characters other than ' .
+ 'HT (\\t), LF (\\n), and CR (\\r).'
+ ];
+
$c0 = '';
$enc = '';
for ( $i = 0; $i < 32; $i++ ) {
return [
'Basic param' => [ 'bar', null, 'bar', [] ],
+ 'Basic param, C0 controls' => [ $c0, null, $enc, $warnings ],
'String param' => [ 'bar', '', 'bar', [] ],
'String param, defaulted' => [ null, '', '', [] ],
'String param, empty' => [ '', 'default', '', [] ],
$c0,
[ ApiBase::PARAM_ISMULTI => true ],
[ $enc ],
- []
+ $warnings
],
'Multi-valued parameter, other C0 controls (2)' => [
"\x1f" . $c0,
[ ApiBase::PARAM_ISMULTI => true ],
[ substr( $enc, 0, -3 ), '' ],
- []
+ $warnings
],
];
}
return [ $target, $pageSet ];
}
+
+ public function testHandleNormalization() {
+ $context = new RequestContext();
+ $context->setRequest( new FauxRequest( [ 'titles' => "a|B|a\xcc\x8a" ] ) );
+ $main = new ApiMain( $context );
+ $pageSet = new ApiPageSet( $main );
+ $pageSet->execute();
+
+ $this->assertSame(
+ [ 0 => [ 'A' => -1, 'B' => -2, 'Å' => -3 ] ],
+ $pageSet->getAllTitlesByNamespace()
+ );
+ $this->assertSame(
+ [
+ [ 'fromencoded' => true, 'from' => 'a%CC%8A', 'to' => 'å' ],
+ [ 'fromencoded' => false, 'from' => 'a', 'to' => 'A' ],
+ [ 'fromencoded' => false, 'from' => 'å', 'to' => 'Å' ],
+ ],
+ $pageSet->getNormalizedTitlesAsResult()
+ );
+ }
}
$this->assertEquals(
[
+ 'fromencoded' => false,
'from' => 'Project:articleA',
'to' => $to->getPrefixedText(),
],
$this->assertEquals(
[
+ 'fromencoded' => false,
'from' => 'article_B',
'to' => 'Article B'
],