* Handle fallbacks too in extension aliases
[lhc/web/wiklou.git] / maintenance / language / checkLanguage.inc
1 <?php
2 /**
3 * @ingroup MaintenanceLanguage
4 */
5
6 class CheckLanguageCLI {
7 protected $code = null;
8 protected $level = 2;
9 protected $doLinks = false;
10 protected $wikiCode = 'en';
11 protected $checkAll = false;
12 protected $output = 'plain';
13 protected $checks = array();
14 protected $L = null;
15
16 protected $defaultChecks = array(
17 'untranslated', 'obsolete', 'variables', 'empty', 'plural',
18 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced'
19 );
20
21 protected $results = array();
22
23 private $includeExif = false;
24
25 /**
26 * GLOBALS: $wgLanguageCode;
27 */
28 public function __construct( Array $options ) {
29
30 if ( isset( $options['help'] ) ) {
31 echo $this->help();
32 exit();
33 }
34
35 if ( isset($options['lang']) ) {
36 $this->code = $options['lang'];
37 } else {
38 global $wgLanguageCode;
39 $this->code = $wgLanguageCode;
40 }
41
42 if ( isset($options['level']) ) {
43 $this->level = $options['level'];
44 }
45
46 $this->doLinks = isset($options['links']);
47 $this->includeExif = !isset($options['noexif']);
48 $this->checkAll = isset($options['all']);
49
50 if ( isset($options['wikilang']) ) {
51 $this->wikiCode = $options['wikilang'];
52 }
53
54 if ( isset( $options['whitelist'] ) ) {
55 $this->checks = explode( ',', $options['whitelist'] );
56 } elseif ( isset( $options['blacklist'] ) ) {
57 $this->checks = array_diff(
58 $this->defaultChecks,
59 explode( ',', $options['blacklist'] )
60 );
61 } else {
62 $this->checks = $this->defaultChecks;
63 }
64
65 if ( isset($options['output']) ) {
66 $this->output = $options['output'];
67 }
68
69 # Some additional checks not enabled by default
70 if ( isset( $options['duplicate'] ) ) {
71 $this->checks[] = 'duplicate';
72 }
73
74 $this->L = new languages( $this->includeExif );
75 }
76
77 protected function getChecks() {
78 $checks = array();
79 $checks['untranslated'] = 'getUntranslatedMessages';
80 $checks['duplicate'] = 'getDuplicateMessages';
81 $checks['obsolete'] = 'getObsoleteMessages';
82 $checks['variables'] = 'getMessagesWithoutVariables';
83 $checks['plural'] = 'getMessagesWithoutPlural';
84 $checks['empty'] = 'getEmptyMessages';
85 $checks['whitespace'] = 'getMessagesWithWhitespace';
86 $checks['xhtml'] = 'getNonXHTMLMessages';
87 $checks['chars'] = 'getMessagesWithWrongChars';
88 $checks['links'] = 'getMessagesWithDubiousLinks';
89 $checks['unbalanced'] = 'getMessagesWithUnbalanced';
90 return $checks;
91 }
92
93 protected function getDescriptions() {
94 $descriptions = array();
95 $descriptions['untranslated'] = '$1 message(s) of $2 are not translated to $3, but exist in en:';
96 $descriptions['duplicate'] = '$1 message(s) of $2 are translated the same in en and $3:';
97 $descriptions['obsolete'] = '$1 message(s) of $2 do not exist in en or are in the ignore list, but are in $3';
98 $descriptions['variables'] = '$1 message(s) of $2 in $3 don\'t use some variables that en uses:';
99 $descriptions['plural'] = '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:';
100 $descriptions['empty'] = '$1 message(s) of $2 in $3 are empty or -:';
101 $descriptions['whitespace'] = '$1 message(s) of $2 in $3 have trailing whitespace:';
102 $descriptions['xhtml'] = '$1 message(s) of $2 in $3 contain illegal XHTML:';
103 $descriptions['chars'] = '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:';
104 $descriptions['links'] = '$1 message(s) of $2 in $3 have problematic link(s):';
105 $descriptions['unbalanced'] = '$1 message(s) of $2 in $3 have unbalanced {[]}:';
106 return $descriptions;
107 }
108
109 protected function help() {
110 return <<<ENDS
111 Run this script to check a specific language file, or all of them.
112 Command line settings are in form --parameter[=value].
113 Parameters:
114 * lang: Language code (default: the installation default language).
115 * all: Check all customized languages.
116 * help: Show this help.
117 * level: Show the following level (default: 2).
118 * links: Link the message values (default off).
119 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
120 * whitelist: Do only the following checks (form: code,code).
121 * blacklist: Don't do the following checks (form: code,code).
122 * duplicate: Additionally check for messages which are translated the same to English (default off).
123 * noexif: Don't check for EXIF messages (a bit hard and boring to translate), if you know that they are currently not translated and want to focus on other problems (default off).
124 Check codes (ideally, all of them should result 0; all the checks are executed by default (except duplicate and language specific check blacklists in checkLanguage.inc):
125 * untranslated: Messages which are required to translate, but are not translated.
126 * duplicate: Messages which translation equal to fallback
127 * obsolete: Messages which are untranslatable, but translated.
128 * variables: Messages without variables which should be used.
129 * empty: Empty messages.
130 * whitespace: Messages which have trailing whitespace.
131 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
132 * chars: Messages with hidden characters.
133 * links: Messages which contains broken links to pages (does not find all).
134 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
135 Display levels (default: 2):
136 * 0: Skip the checks (useful for checking syntax).
137 * 1: Show only the stub headers and number of wrong messages, without list of messages.
138 * 2: Show only the headers and the message keys, without the message values.
139 * 3: Show both the headers and the complete messages, with both keys and values.
140
141 ENDS;
142 }
143
144 public function execute() {
145 $this->doChecks();
146 if ( $this->level > 0 ) {
147 switch ($this->output) {
148 case 'plain':
149 $this->outputText();
150 break;
151 case 'wiki':
152 $this->outputWiki();
153 break;
154 default:
155 throw new MWException( "Invalid output type $this->output");
156 }
157 }
158 }
159
160 protected function doChecks() {
161 $ignoredCodes = array( 'en', 'enRTL' );
162
163 $this->results = array();
164 # Check the language
165 if ( $this->checkAll ) {
166 foreach ( $this->L->getLanguages() as $language ) {
167 if ( !in_array($language, $ignoredCodes) ) {
168 $this->results[$language] = $this->checkLanguage( $language );
169 }
170 }
171 } else {
172 if ( in_array($this->code, $ignoredCodes) ) {
173 throw new MWException("Cannot check code $this->code.");
174 } else {
175 $this->results[$this->code] = $this->checkLanguage( $this->code );
176 }
177 }
178 }
179
180 protected function getCheckBlacklist() {
181 global $checkBlacklist;
182 return $checkBlacklist;
183 }
184
185 protected function checkLanguage( $code ) {
186 # Syntax check only
187 if ( $this->level === 0 ) {
188 $this->L->getMessages( $code );
189 return;
190 }
191
192 $results = array();
193 $checkFunctions = $this->getChecks();
194 $checkBlacklist = $this->getCheckBlacklist();
195 foreach ( $this->checks as $check ) {
196 if ( isset($checkBlacklist[$code]) &&
197 in_array($check, $checkBlacklist[$code]) ) {
198 $result[$check] = array();
199 continue;
200 }
201
202 $callback = array( $this->L, $checkFunctions[$check] );
203 if ( !is_callable($callback ) ) {
204 throw new MWException( "Unkown check $check." );
205 }
206 $results[$check] = call_user_func( $callback , $code );
207 }
208
209 return $results;
210 }
211
212 protected function formatKey( $key, $code ) {
213 if ( $this->doLinks ) {
214 $displayKey = ucfirst( $key );
215 if ( $code == $this->wikiCode ) {
216 return "[[MediaWiki:$displayKey|$key]]";
217 } else {
218 return "[[MediaWiki:$displayKey/$code|$key]]";
219 }
220 } else {
221 return $key;
222 }
223 }
224
225 protected function outputText() {
226 foreach ( $this->results as $code => $results ) {
227 $translated = $this->L->getMessages( $code );
228 $translated = count( $translated['translated'] );
229 $translatable = $this->L->getGeneralMessages();
230 $translatable = count( $translatable['translatable'] );
231 foreach ( $results as $check => $messages ) {
232 $count = count( $messages );
233 if ( $count ) {
234 $search = array( '$1', '$2', '$3' );
235 $replace = array( $count, $check == 'untranslated' ? $translatable: $translated, $code );
236 $descriptions = $this->getDescriptions();
237 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n";
238 if ( $this->level == 1 ) {
239 echo "[messages are hidden]\n";
240 } else {
241 foreach ( $messages as $key => $value ) {
242 $displayKey = $this->formatKey( $key, $code );
243 if ( $this->level == 2 ) {
244 echo "* $displayKey\n";
245 } else {
246 echo "* $displayKey: '$value'\n";
247 }
248 }
249 }
250 }
251 }
252 }
253 }
254
255 /**
256 * Globals: $wgContLang, $IP
257 */
258 function outputWiki() {
259 global $wgContLang, $IP;
260 $detailText = '';
261 $rows[] = '! Language !! Code !! Total !! ' . implode( ' !! ', $this->checks );
262 foreach ( $this->results as $code => $results ) {
263 $detailTextForLang = "==$code==\n";
264 $numbers = array();
265 $problems = 0;
266 $detailTextForLangChecks = array();
267 foreach ( $results as $check => $messages ) {
268 $count = count( $messages );
269 if ( $count ) {
270 $problems += $count;
271 $messageDetails = array();
272 foreach ( $messages as $key => $details ) {
273 $displayKey = $this->formatKey( $key, $code );
274 $messageDetails[] = $displayKey;
275 }
276 $detailTextForLangChecks[] = "===$code-$check===\n* " . implode( ', ', $messageDetails );
277 $numbers[] = "'''[[#$code-$check|$count]]'''";
278 } else {
279 $numbers[] = $count;
280 }
281
282 }
283
284 if ( count( $detailTextForLangChecks ) ) {
285 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n";
286 }
287
288 if ( !$problems ) { continue; } // Don't list languages without problems
289 $language = $wgContLang->getLanguageName( $code );
290 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers );
291 }
292
293 $tableRows = implode( "\n|-\n", $rows );
294
295 $version = SpecialVersion::getVersion( $IP );
296 echo <<<EOL
297 '''Check results are for:''' <code>$version</code>
298
299
300 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear:both;"
301 $tableRows
302 |}
303
304 $detailText
305
306 EOL;
307 }
308 }
309
310 class CheckExtensionsCLI extends CheckLanguageCLI {
311 private $extensions;
312
313 public function __construct( Array $options, $extension ) {
314 if ( isset( $options['help'] ) ) {
315 echo $this->help();
316 exit();
317 }
318
319 if ( isset($options['lang']) ) {
320 $this->code = $options['lang'];
321 } else {
322 global $wgLanguageCode;
323 $this->code = $wgLanguageCode;
324 }
325
326 if ( isset($options['level']) ) {
327 $this->level = $options['level'];
328 }
329
330 $this->doLinks = isset($options['links']);
331
332 if ( isset($options['wikilang']) ) {
333 $this->wikiCode = $options['wikilang'];
334 }
335
336 if ( isset( $options['whitelist'] ) ) {
337 $this->checks = explode( ',', $options['whitelist'] );
338 } elseif ( isset( $options['blacklist'] ) ) {
339 $this->checks = array_diff(
340 $this->defaultChecks,
341 explode( ',', $options['blacklist'] )
342 );
343 } else {
344 $this->checks = $this->defaultChecks;
345 }
346
347 if ( isset($options['output']) ) {
348 $this->output = $options['output'];
349 }
350
351 # Some additional checks not enabled by default
352 if ( isset( $options['duplicate'] ) ) {
353 $this->checks[] = 'duplicate';
354 }
355
356 $this->extensions = array();
357 $extensions = new PremadeMediawikiExtensionGroups();
358 $extensions->addAll();
359 if( $extension == 'all' ) {
360 foreach( MessageGroups::singleton()->getGroups() as $group ) {
361 if( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) {
362 $this->extensions[] = new extensionLanguages( $group );
363 }
364 }
365 } elseif( $extension == 'wikimedia' ) {
366 $wikimedia = MessageGroups::getGroup( 'ext-0-wikimedia' );
367 foreach( $wikimedia->wmfextensions() as $extension ) {
368 $group = MessageGroups::getGroup( $extension );
369 $this->extensions[] = new extensionLanguages( $group );
370 }
371 } else {
372 $extensions = explode( ',', $extension );
373 foreach( $extensions as $extension ) {
374 $group = MessageGroups::getGroup( 'ext-' . $extension );
375 if( $group ) {
376 $extension = new extensionLanguages( $group );
377 $this->extensions[] = $extension;
378 } else {
379 print "No such extension $extension.\n";
380 }
381 }
382 }
383 }
384
385 protected function help() {
386 return <<<ENDS
387 Run this script to check the status of a specific language in extensions, or all of them.
388 Command line settings are in form --parameter[=value], except for the first one.
389 Parameters:
390 * First parameter (mandatory): Extension name, multiple extension names (separated by commas), "all" for all the extensions or "wikimedia" for extensions used by Wikimedia.
391 * lang: Language code (default: the installation default language).
392 * help: Show this help.
393 * level: Show the following level (default: 2).
394 * links: Link the message values (default off).
395 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
396 * whitelist: Do only the following checks (form: code,code).
397 * blacklist: Don't do the following checks (form: code,code).
398 * duplicate: Additionally check for messages which are translated the same to English (default off).
399 Check codes (ideally, all of them should result 0; all the checks are executed by default (except duplicate and language specific check blacklists in checkLanguage.inc):
400 * untranslated: Messages which are required to translate, but are not translated.
401 * duplicate: Messages which translation equal to fallback
402 * obsolete: Messages which are untranslatable, but translated.
403 * variables: Messages without variables which should be used.
404 * empty: Empty messages.
405 * whitespace: Messages which have trailing whitespace.
406 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
407 * chars: Messages with hidden characters.
408 * links: Messages which contains broken links to pages (does not find all).
409 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
410 Display levels (default: 2):
411 * 0: Skip the checks (useful for checking syntax).
412 * 1: Show only the stub headers and number of wrong messages, without list of messages.
413 * 2: Show only the headers and the message keys, without the message values.
414 * 3: Show both the headers and the complete messages, with both keys and values.
415
416 ENDS;
417 }
418
419 public function execute() {
420 $this->doChecks();
421 }
422
423 protected function checkLanguage( $code ) {
424 foreach( $this->extensions as $extension ) {
425 echo $extension->name() . ":\n";
426
427 $this->L = $extension;
428 $this->results = array();
429 $this->results[$code] = parent::checkLanguage( $code );
430
431 if( $this->level > 0 ) {
432 switch( $this->output ) {
433 case 'plain':
434 $this->outputText();
435 break;
436 case 'wiki':
437 $this->outputWiki();
438 break;
439 default:
440 throw new MWException( "Invalid output type $this->output" );
441 }
442 }
443
444 echo "\n";
445 }
446 }
447 }
448
449 # Blacklist some checks for some languages
450 $checkBlacklist = array(
451 #'code' => array( 'check1', 'check2' ... )
452 'gan' => array( 'plural' ),
453 'hak' => array( 'plural' ),
454 'ja' => array( 'plural' ), // Does not use plural
455 'ka' => array( 'plural' ),
456 'kk-arab' => array( 'plural' ),
457 'kk-cyrl' => array( 'plural' ),
458 'kk-latn' => array( 'plural' ),
459 'ko' => array( 'plural' ),
460 'mn' => array( 'plural' ),
461 'ms' => array( 'plural' ),
462 'my' => array( 'chars' ), // Uses a lot zwnj
463 'sq' => array( 'plural' ),
464 'tet' => array( 'plural' ),
465 'th' => array( 'plural' ),
466 'wuu' => array( 'plural' ),
467 'xmf' => array( 'plural' ),
468 'yue' => array( 'plural' ),
469 'zh' => array( 'plural' ),
470 'zh-classical' => array( 'plural' ),
471 'zh-cn' => array( 'plural' ),
472 'zh-hans' => array( 'plural' ),
473 'zh-hant' => array( 'plural' ),
474 'zh-hk' => array( 'plural' ),
475 'zh-sg' => array( 'plural' ),
476 'zh-tw' => array( 'plural' ),
477 'zh-yue' => array( 'plural' ),
478 );