Merge "Proper namespace handling for WikiImporter"
[lhc/web/wiklou.git] / tests / phpunit / includes / title / MediaWikiTitleCodecTest.php
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 * @license GPL 2+
20 * @author Daniel Kinzler
21 */
22
23 /**
24 * @covers MediaWikiTitleCodec
25 *
26 * @group Title
27 * @group Database
28 * ^--- needed because of global state in
29 */
30 class MediaWikiTitleCodecTest extends MediaWikiTestCase {
31
32 public function setUp() {
33 parent::setUp();
34
35 $this->setMwGlobals( array(
36 'wgLanguageCode' => 'en',
37 'wgContLang' => Language::factory( 'en' ),
38 // User language
39 'wgLang' => Language::factory( 'en' ),
40 'wgAllowUserJs' => false,
41 'wgDefaultLanguageVariant' => false,
42 'wgMetaNamespace' => 'Project',
43 'wgLocalInterwikis' => array( 'localtestiw' ),
44 'wgCapitalLinks' => true,
45
46 // NOTE: this is why global state is evil.
47 // TODO: refactor access to the interwiki codes so it can be injected.
48 'wgHooks' => array(
49 'InterwikiLoadPrefix' => array(
50 function ( $prefix, &$data ) {
51 if ( $prefix === 'localtestiw' ) {
52 $data = array( 'iw_url' => 'localtestiw' );
53 } elseif ( $prefix === 'remotetestiw' ) {
54 $data = array( 'iw_url' => 'remotetestiw' );
55 }
56 return false;
57 }
58 )
59 )
60 ) );
61 }
62
63 /**
64 * Returns a mock GenderCache that will consider a user "female" if the
65 * first part of the user name ends with "a".
66 *
67 * @return GenderCache
68 */
69 private function getGenderCache() {
70 $genderCache = $this->getMockBuilder( 'GenderCache' )
71 ->disableOriginalConstructor()
72 ->getMock();
73
74 $genderCache->expects( $this->any() )
75 ->method( 'getGenderOf' )
76 ->will( $this->returnCallback( function ( $userName ) {
77 return preg_match( '/^[^- _]+a( |_|$)/u', $userName ) ? 'female' : 'male';
78 } ) );
79
80 return $genderCache;
81 }
82
83 protected function makeCodec( $lang ) {
84 $gender = $this->getGenderCache();
85 $lang = Language::factory( $lang );
86 // language object can came from cache, which does not respect test settings
87 $lang->resetNamespaces();
88 return new MediaWikiTitleCodec( $lang, $gender );
89 }
90
91 public static function provideFormat() {
92 return array(
93 array( NS_MAIN, 'Foo_Bar', '', 'en', 'Foo Bar' ),
94 array( NS_USER, 'Hansi_Maier', 'stuff_and_so_on', 'en', 'User:Hansi Maier#stuff and so on' ),
95 array( false, 'Hansi_Maier', '', 'en', 'Hansi Maier' ),
96 array(
97 NS_USER_TALK,
98 'hansi__maier',
99 '',
100 'en',
101 'User talk:hansi maier',
102 'User talk:Hansi maier'
103 ),
104
105 // getGenderCache() provides a mock that considers first
106 // names ending in "a" to be female.
107 array( NS_USER, 'Lisa_Müller', '', 'de', 'Benutzerin:Lisa Müller' ),
108 );
109 }
110
111 /**
112 * @dataProvider provideFormat
113 */
114 public function testFormat( $namespace, $text, $fragment, $lang, $expected, $normalized = null ) {
115 if ( $normalized === null ) {
116 $normalized = $expected;
117 }
118
119 $codec = $this->makeCodec( $lang );
120 $actual = $codec->formatTitle( $namespace, $text, $fragment );
121
122 $this->assertEquals( $expected, $actual, 'formatted' );
123
124 // test round trip
125 $parsed = $codec->parseTitle( $actual, NS_MAIN );
126 $actual2 = $codec->formatTitle(
127 $parsed->getNamespace(),
128 $parsed->getText(),
129 $parsed->getFragment()
130 );
131
132 $this->assertEquals( $normalized, $actual2, 'normalized after round trip' );
133 }
134
135 public static function provideGetText() {
136 return array(
137 array( NS_MAIN, 'Foo_Bar', '', 'en', 'Foo Bar' ),
138 array( NS_USER, 'Hansi_Maier', 'stuff_and_so_on', 'en', 'Hansi Maier' ),
139 );
140 }
141
142 /**
143 * @dataProvider provideGetText
144 */
145 public function testGetText( $namespace, $dbkey, $fragment, $lang, $expected ) {
146 $codec = $this->makeCodec( $lang );
147 $title = new TitleValue( $namespace, $dbkey, $fragment );
148
149 $actual = $codec->getText( $title );
150
151 $this->assertEquals( $expected, $actual );
152 }
153
154 public static function provideGetPrefixedText() {
155 return array(
156 array( NS_MAIN, 'Foo_Bar', '', 'en', 'Foo Bar' ),
157 array( NS_USER, 'Hansi_Maier', 'stuff_and_so_on', 'en', 'User:Hansi Maier' ),
158
159 // No capitalization or normalization is applied while formatting!
160 array( NS_USER_TALK, 'hansi__maier', '', 'en', 'User talk:hansi maier' ),
161
162 // getGenderCache() provides a mock that considers first
163 // names ending in "a" to be female.
164 array( NS_USER, 'Lisa_Müller', '', 'de', 'Benutzerin:Lisa Müller' ),
165 );
166 }
167
168 /**
169 * @dataProvider provideGetPrefixedText
170 */
171 public function testGetPrefixedText( $namespace, $dbkey, $fragment, $lang, $expected ) {
172 $codec = $this->makeCodec( $lang );
173 $title = new TitleValue( $namespace, $dbkey, $fragment );
174
175 $actual = $codec->getPrefixedText( $title );
176
177 $this->assertEquals( $expected, $actual );
178 }
179
180 public static function provideGetFullText() {
181 return array(
182 array( NS_MAIN, 'Foo_Bar', '', 'en', 'Foo Bar' ),
183 array( NS_USER, 'Hansi_Maier', 'stuff_and_so_on', 'en', 'User:Hansi Maier#stuff and so on' ),
184
185 // No capitalization or normalization is applied while formatting!
186 array( NS_USER_TALK, 'hansi__maier', '', 'en', 'User talk:hansi maier' ),
187 );
188 }
189
190 /**
191 * @dataProvider provideGetFullText
192 */
193 public function testGetFullText( $namespace, $dbkey, $fragment, $lang, $expected ) {
194 $codec = $this->makeCodec( $lang );
195 $title = new TitleValue( $namespace, $dbkey, $fragment );
196
197 $actual = $codec->getFullText( $title );
198
199 $this->assertEquals( $expected, $actual );
200 }
201
202 public static function provideParseTitle() {
203 //TODO: test capitalization and trimming
204 //TODO: test unicode normalization
205
206 return array(
207 array( ' : Hansi_Maier _ ', NS_MAIN, 'en',
208 new TitleValue( NS_MAIN, 'Hansi_Maier', '' ) ),
209 array( 'User:::1', NS_MAIN, 'de',
210 new TitleValue( NS_USER, '0:0:0:0:0:0:0:1', '' ) ),
211 array( ' lisa Müller', NS_USER, 'de',
212 new TitleValue( NS_USER, 'Lisa_Müller', '' ) ),
213 array( 'benutzerin:lisa Müller#stuff', NS_MAIN, 'de',
214 new TitleValue( NS_USER, 'Lisa_Müller', 'stuff' ) ),
215
216 array( ':Category:Quux', NS_MAIN, 'en',
217 new TitleValue( NS_CATEGORY, 'Quux', '' ) ),
218 array( 'Category:Quux', NS_MAIN, 'en',
219 new TitleValue( NS_CATEGORY, 'Quux', '' ) ),
220 array( 'Category:Quux', NS_CATEGORY, 'en',
221 new TitleValue( NS_CATEGORY, 'Quux', '' ) ),
222 array( 'Quux', NS_CATEGORY, 'en',
223 new TitleValue( NS_CATEGORY, 'Quux', '' ) ),
224 array( ':Quux', NS_CATEGORY, 'en',
225 new TitleValue( NS_MAIN, 'Quux', '' ) ),
226
227 // getGenderCache() provides a mock that considers first
228 // names ending in "a" to be female.
229
230 array( 'a b c', NS_MAIN, 'en',
231 new TitleValue( NS_MAIN, 'A_b_c' ) ),
232 array( ' a b c ', NS_MAIN, 'en',
233 new TitleValue( NS_MAIN, 'A_b_c' ) ),
234 array( ' _ Foo __ Bar_ _', NS_MAIN, 'en',
235 new TitleValue( NS_MAIN, 'Foo_Bar' ) ),
236
237 //NOTE: cases copied from TitleTest::testSecureAndSplit. Keep in sync.
238 array( 'Sandbox', NS_MAIN, 'en', ),
239 array( 'A "B"', NS_MAIN, 'en', ),
240 array( 'A \'B\'', NS_MAIN, 'en', ),
241 array( '.com', NS_MAIN, 'en', ),
242 array( '~', NS_MAIN, 'en', ),
243 array( '"', NS_MAIN, 'en', ),
244 array( '\'', NS_MAIN, 'en', ),
245
246 array( 'Talk:Sandbox', NS_MAIN, 'en',
247 new TitleValue( NS_TALK, 'Sandbox' ) ),
248 array( 'Talk:Foo:Sandbox', NS_MAIN, 'en',
249 new TitleValue( NS_TALK, 'Foo:Sandbox' ) ),
250 array( 'File:Example.svg', NS_MAIN, 'en',
251 new TitleValue( NS_FILE, 'Example.svg' ) ),
252 array( 'File_talk:Example.svg', NS_MAIN, 'en',
253 new TitleValue( NS_FILE_TALK, 'Example.svg' ) ),
254 array( 'Foo/.../Sandbox', NS_MAIN, 'en',
255 'Foo/.../Sandbox' ),
256 array( 'Sandbox/...', NS_MAIN, 'en',
257 'Sandbox/...' ),
258 array( 'A~~', NS_MAIN, 'en',
259 'A~~' ),
260 // Length is 256 total, but only title part matters
261 array( 'Category:' . str_repeat( 'x', 248 ), NS_MAIN, 'en',
262 new TitleValue( NS_CATEGORY,
263 'X' . str_repeat( 'x', 247 ) ) ),
264 array( str_repeat( 'x', 252 ), NS_MAIN, 'en',
265 'X' . str_repeat( 'x', 251 ) )
266 );
267 }
268
269 /**
270 * @dataProvider provideParseTitle
271 */
272 public function testParseTitle( $text, $ns, $lang, $title = null ) {
273 if ( $title === null ) {
274 $title = str_replace( ' ', '_', trim( $text ) );
275 }
276
277 if ( is_string( $title ) ) {
278 $title = new TitleValue( NS_MAIN, $title, '' );
279 }
280
281 $codec = $this->makeCodec( $lang );
282 $actual = $codec->parseTitle( $text, $ns );
283
284 $this->assertEquals( $title, $actual );
285 }
286
287 public static function provideParseTitle_invalid() {
288 //TODO: test unicode errors
289
290 return array(
291 array( '#' ),
292 array( '::' ),
293 array( '::xx' ),
294 array( '::##' ),
295 array( ' :: x' ),
296
297 array( 'Talk:File:Foo.jpg' ),
298 array( 'Talk:localtestiw:Foo' ),
299 array( 'remotetestiw:Foo' ),
300 array( '::1' ), // only valid in user namespace
301 array( 'User::x' ), // leading ":" in a user name is only valid of IPv6 addresses
302
303 //NOTE: cases copied from TitleTest::testSecureAndSplit. Keep in sync.
304 array( '' ),
305 array( ':' ),
306 array( '__ __' ),
307 array( ' __ ' ),
308 // Bad characters forbidden regardless of wgLegalTitleChars
309 array( 'A [ B' ),
310 array( 'A ] B' ),
311 array( 'A { B' ),
312 array( 'A } B' ),
313 array( 'A < B' ),
314 array( 'A > B' ),
315 array( 'A | B' ),
316 // URL encoding
317 array( 'A%20B' ),
318 array( 'A%23B' ),
319 array( 'A%2523B' ),
320 // XML/HTML character entity references
321 // Note: Commented out because they are not marked invalid by the PHP test as
322 // Title::newFromText runs Sanitizer::decodeCharReferencesAndNormalize first.
323 //array( 'A &eacute; B' ),
324 //array( 'A &#233; B' ),
325 //array( 'A &#x00E9; B' ),
326 // Subject of NS_TALK does not roundtrip to NS_MAIN
327 array( 'Talk:File:Example.svg' ),
328 // Directory navigation
329 array( '.' ),
330 array( '..' ),
331 array( './Sandbox' ),
332 array( '../Sandbox' ),
333 array( 'Foo/./Sandbox' ),
334 array( 'Foo/../Sandbox' ),
335 array( 'Sandbox/.' ),
336 array( 'Sandbox/..' ),
337 // Tilde
338 array( 'A ~~~ Name' ),
339 array( 'A ~~~~ Signature' ),
340 array( 'A ~~~~~ Timestamp' ),
341 array( str_repeat( 'x', 256 ) ),
342 // Namespace prefix without actual title
343 array( 'Talk:' ),
344 array( 'Category: ' ),
345 array( 'Category: #bar' )
346 );
347 }
348
349 /**
350 * @dataProvider provideParseTitle_invalid
351 */
352 public function testParseTitle_invalid( $text ) {
353 $this->setExpectedException( 'MalformedTitleException' );
354
355 $codec = $this->makeCodec( 'en' );
356 $codec->parseTitle( $text, NS_MAIN );
357 }
358
359 public static function provideGetNamespaceName() {
360 return array(
361 array( NS_MAIN, 'Foo', 'en', '' ),
362 array( NS_USER, 'Foo', 'en', 'User' ),
363 array( NS_USER, 'Hansi Maier', 'de', 'Benutzer' ),
364
365 // getGenderCache() provides a mock that considers first
366 // names ending in "a" to be female.
367 array( NS_USER, 'Lisa Müller', 'de', 'Benutzerin' ),
368 );
369 }
370
371 /**
372 * @dataProvider provideGetNamespaceName
373 *
374 * @param int $namespace
375 * @param string $text
376 * @param string $lang
377 * @param string $expected
378 *
379 * @internal param \TitleValue $title
380 */
381 public function testGetNamespaceName( $namespace, $text, $lang, $expected ) {
382 $codec = $this->makeCodec( $lang );
383 $name = $codec->getNamespaceName( $namespace, $text );
384
385 $this->assertEquals( $expected, $name );
386 }
387 }