return $rxTc;
}
+ /**
+ * Utility method for converting a character sequence from bytes to Unicode.
+ *
+ * Primary usecase being converting $wgLegalTitleChars to a sequence usable in
+ * javascript, as PHP uses UTF-8 bytes where javascript uses Unicode code units.
+ *
+ * @param string $byteClass
+ * @return string
+ */
+ public static function convertByteClassToUnicodeClass( $byteClass ) {
+ $length = strlen( $byteClass );
+ // Input token queue
+ $x0 = $x1 = $x2 = '';
+ // Decoded queue
+ $d0 = $d1 = $d2 = '';
+ // Decoded integer codepoints
+ $ord0 = $ord1 = $ord2 = 0;
+ // Re-encoded queue
+ $r0 = $r1 = $r2 = '';
+ // Output
+ $out = '';
+ // Flags
+ $allowUnicode = false;
+ for ( $pos = 0; $pos < $length; $pos++ ) {
+ // Shift the queues down
+ $x2 = $x1;
+ $x1 = $x0;
+ $d2 = $d1;
+ $d1 = $d0;
+ $ord2 = $ord1;
+ $ord1 = $ord0;
+ $r2 = $r1;
+ $r1 = $r0;
+ // Load the current input token and decoded values
+ $inChar = $byteClass[$pos];
+ if ( $inChar == '\\' ) {
+ if ( preg_match( '/x([0-9a-fA-F]{2})/A', $byteClass, $m, 0, $pos + 1 ) ) {
+ $x0 = $inChar . $m[0];
+ $d0 = chr( hexdec( $m[1] ) );
+ $pos += strlen( $m[0] );
+ } elseif ( preg_match( '/[0-7]{3}/A', $byteClass, $m, 0, $pos + 1 ) ) {
+ $x0 = $inChar . $m[0];
+ $d0 = chr( octdec( $m[0] ) );
+ $pos += strlen( $m[0] );
+ } elseif ( $pos + 1 >= $length ) {
+ $x0 = $d0 = '\\';
+ } else {
+ $d0 = $byteClass[$pos + 1];
+ $x0 = $inChar . $d0;
+ $pos += 1;
+ }
+ } else {
+ $x0 = $d0 = $inChar;
+ }
+ $ord0 = ord( $d0 );
+ // Load the current re-encoded value
+ if ( $ord0 < 32 || $ord0 == 0x7f ) {
+ $r0 = sprintf( '\x%02x', $ord0 );
+ } elseif ( $ord0 >= 0x80 ) {
+ // Allow unicode if a single high-bit character appears
+ $r0 = sprintf( '\x%02x', $ord0 );
+ $allowUnicode = true;
+ } elseif ( strpos( '-\\[]^', $d0 ) !== false ) {
+ $r0 = '\\' . $d0;
+ } else {
+ $r0 = $d0;
+ }
+ // Do the output
+ if ( $x0 !== '' && $x1 === '-' && $x2 !== '' ) {
+ // Range
+ if ( $ord2 > $ord0 ) {
+ // Empty range
+ } elseif ( $ord0 >= 0x80 ) {
+ // Unicode range
+ $allowUnicode = true;
+ if ( $ord2 < 0x80 ) {
+ // Keep the non-unicode section of the range
+ $out .= "$r2-\\x7F";
+ }
+ } else {
+ // Normal range
+ $out .= "$r2-$r0";
+ }
+ // Reset state to the initial value
+ $x0 = $x1 = $d0 = $d1 = $r0 = $r1 = '';
+ } elseif ( $ord2 < 0x80 ) {
+ // ASCII character
+ $out .= $r2;
+ }
+ }
+ if ( $ord1 < 0x80 ) {
+ $out .= $r1;
+ }
+ if ( $ord0 < 0x80 ) {
+ $out .= $r0;
+ }
+ if ( $allowUnicode ) {
+ $out .= '\u0080-\uFFFF';
+ }
+ return $out;
+ }
+
/**
* Get a string representation of a title suitable for
* including in a search index
global $wgFileExtensions;
$data = array();
- foreach ( $wgFileExtensions as $ext ) {
+ foreach ( array_unique( $wgFileExtensions ) as $ext ) {
$data[] = array( 'ext' => $ext );
}
$this->getResult()->setIndexedTagName( $data, 'fe' );
case UploadBase::FILETYPE_BADTYPE:
$extradata = array(
'filetype' => $verification['finalExt'],
- 'allowed' => $wgFileExtensions
+ 'allowed' => array_values( array_unique( $wgFileExtensions ) )
);
$this->getResult()->setIndexedTagName( $extradata['allowed'], 'ext' );
'wgFormattedNamespaces' => $wgContLang->getFormattedNamespaces(),
'wgNamespaceIds' => $namespaceIds,
'wgSiteName' => $wgSitename,
- 'wgFileExtensions' => array_values( $wgFileExtensions ),
+ 'wgFileExtensions' => array_values( array_unique( $wgFileExtensions ) ),
'wgDBname' => $wgDBname,
// This sucks, it is only needed on Special:Upload, but I could
// not find a way to add vars only for a certain module
'wgCookiePrefix' => $wgCookiePrefix,
'wgResourceLoaderMaxQueryLength' => $wgResourceLoaderMaxQueryLength,
'wgCaseSensitiveNamespaces' => $caseSensitiveNamespaces,
+ 'wgLegalTitleChars' => Title::convertByteClassToUnicodeClass( Title::legalChars() ),
);
wfRunHooks( 'ResourceLoaderGetConfigVars', array( &$vars ) );
} else {
$msg->params( $details['finalExt'] );
}
- $msg->params( $this->getLanguage()->commaList( $wgFileExtensions ),
- count( $wgFileExtensions ) );
+ $extensions = array_unique( $wgFileExtensions );
+ $msg->params( $this->getLanguage()->commaList( $extensions ),
+ count( $extensions ) );
// Add PLURAL support for the first parameter. This results
// in a bit unlogical parameter sequence, but does not break
# Everything not permitted is banned
$extensionsList =
'<div id="mw-upload-permitted">' .
- $this->msg( 'upload-permitted', $this->getContext()->getLanguage()->commaList( $wgFileExtensions ) )->parseAsBlock() .
+ $this->msg( 'upload-permitted', $this->getContext()->getLanguage()->commaList( array_unique( $wgFileExtensions ) ) )->parseAsBlock() .
"</div>\n";
} else {
# We have to list both preferred and prohibited
$extensionsList =
'<div id="mw-upload-preferred">' .
- $this->msg( 'upload-preferred', $this->getContext()->getLanguage()->commaList( $wgFileExtensions ) )->parseAsBlock() .
+ $this->msg( 'upload-preferred', $this->getContext()->getLanguage()->commaList( array_unique( $wgFileExtensions ) ) )->parseAsBlock() .
"</div>\n" .
'<div id="mw-upload-prohibited">' .
- $this->msg( 'upload-prohibited', $this->getContext()->getLanguage()->commaList( $wgFileBlacklist ) )->parseAsBlock() .
+ $this->msg( 'upload-prohibited', $this->getContext()->getLanguage()->commaList( array_unique( $wgFileBlacklist ) ) )->parseAsBlock() .
"</div>\n";
}
} else {
// Check whether the file extension is on the unwanted list
global $wgCheckFileExtensions, $wgFileExtensions;
if ( $wgCheckFileExtensions ) {
- if ( !$this->checkFileExtension( $this->mFinalExtension, $wgFileExtensions ) ) {
+ $extensions = array_unique( $wgFileExtensions );
+ if ( !$this->checkFileExtension( $this->mFinalExtension, $extensions ) ) {
$warnings['filetype-unwanted-type'] = array( $this->mFinalExtension,
- $wgLang->commaList( $wgFileExtensions ), count( $wgFileExtensions ) );
+ $wgLang->commaList( $extensions ), count( $extensions ) );
}
}
}
}
+ public static function provideConvertByteClassToUnicodeClass() {
+ return array(
+ array(
+ ' %!"$&\'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\\x80-\\xFF+',
+ ' %!"$&\'()*,\\-./0-9:;=?@A-Z\\\\\\^_`a-z~+\\u0080-\\uFFFF',
+ ),
+ array(
+ 'QWERTYf-\\xFF+',
+ 'QWERTYf-\\x7F+\\u0080-\\uFFFF',
+ ),
+ array(
+ 'QWERTY\\x66-\\xFD+',
+ 'QWERTYf-\\x7F+\\u0080-\\uFFFF',
+ ),
+ array(
+ 'QWERTYf-y+',
+ 'QWERTYf-y+',
+ ),
+ array(
+ 'QWERTYf-\\x80+',
+ 'QWERTYf-\\x7F+\\u0080-\\uFFFF',
+ ),
+ array(
+ 'QWERTY\\x66-\\x80+\\x23',
+ 'QWERTYf-\\x7F+#\\u0080-\\uFFFF',
+ ),
+ array(
+ 'QWERTY\\x66-\\x80+\\xD3',
+ 'QWERTYf-\\x7F+\\u0080-\\uFFFF',
+ ),
+ array(
+ '\\\\\\x99',
+ '\\\\\\u0080-\\uFFFF',
+ ),
+ array(
+ '-\\x99',
+ '\\-\\u0080-\\uFFFF',
+ ),
+ array(
+ 'QWERTY\\-\\x99',
+ 'QWERTY\\-\\u0080-\\uFFFF',
+ ),
+ array(
+ '\\\\x99',
+ '\\\\x99',
+ ),
+ array(
+ 'A-\\x9F',
+ 'A-\\x7F\\u0080-\\uFFFF',
+ ),
+ array(
+ '\\x66-\\x77QWERTY\\x88-\\x91FXZ',
+ 'f-wQWERTYFXZ\\u0080-\\uFFFF',
+ ),
+ array(
+ '\\x66-\\x99QWERTY\\xAA-\\xEEFXZ',
+ 'f-\\x7FQWERTYFXZ\\u0080-\\uFFFF',
+ ),
+ );
+ }
+
+ /**
+ * @dataProvider provideConvertByteClassToUnicodeClass
+ */
+ function testConvertByteClassToUnicodeClass( $byteClass, $unicodeClass ) {
+ $this->assertEquals( $unicodeClass, Title::convertByteClassToUnicodeClass( $byteClass ) );
+ }
+
/**
* @dataProvider provideBug31100
*/