Add duplicate check to the default checks: Now that messages may be makred as optiona...
[lhc/web/wiklou.git] / maintenance / language / checkLanguage.inc
1 <?php
2 /**
3 * @ingroup MaintenanceLanguage
4 */
5
6 class CheckLanguageCLI {
7 protected $code = null;
8 protected $level = 2;
9 protected $doLinks = false;
10 protected $wikiCode = 'en';
11 protected $checkAll = false;
12 protected $output = 'plain';
13 protected $checks = array();
14 protected $L = null;
15
16 protected $defaultChecks = array(
17 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
18 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced',
19 );
20
21 protected $results = array();
22
23 private $includeExif = false;
24
25 /**
26 * GLOBALS: $wgLanguageCode;
27 */
28 public function __construct( Array $options ) {
29
30 if ( isset( $options['help'] ) ) {
31 echo $this->help();
32 exit();
33 }
34
35 if ( isset($options['lang']) ) {
36 $this->code = $options['lang'];
37 } else {
38 global $wgLanguageCode;
39 $this->code = $wgLanguageCode;
40 }
41
42 if ( isset($options['level']) ) {
43 $this->level = $options['level'];
44 }
45
46 $this->doLinks = isset($options['links']);
47 $this->includeExif = !isset($options['noexif']);
48 $this->checkAll = isset($options['all']);
49
50 if ( isset($options['wikilang']) ) {
51 $this->wikiCode = $options['wikilang'];
52 }
53
54 if ( isset( $options['whitelist'] ) ) {
55 $this->checks = explode( ',', $options['whitelist'] );
56 } elseif ( isset( $options['blacklist'] ) ) {
57 $this->checks = array_diff(
58 $this->defaultChecks,
59 explode( ',', $options['blacklist'] )
60 );
61 } else {
62 $this->checks = $this->defaultChecks;
63 }
64
65 if ( isset($options['output']) ) {
66 $this->output = $options['output'];
67 }
68
69 $this->L = new languages( $this->includeExif );
70 }
71
72 protected function getChecks() {
73 $checks = array();
74 $checks['untranslated'] = 'getUntranslatedMessages';
75 $checks['duplicate'] = 'getDuplicateMessages';
76 $checks['obsolete'] = 'getObsoleteMessages';
77 $checks['variables'] = 'getMessagesWithoutVariables';
78 $checks['plural'] = 'getMessagesWithoutPlural';
79 $checks['empty'] = 'getEmptyMessages';
80 $checks['whitespace'] = 'getMessagesWithWhitespace';
81 $checks['xhtml'] = 'getNonXHTMLMessages';
82 $checks['chars'] = 'getMessagesWithWrongChars';
83 $checks['links'] = 'getMessagesWithDubiousLinks';
84 $checks['unbalanced'] = 'getMessagesWithUnbalanced';
85 return $checks;
86 }
87
88 protected function getDescriptions() {
89 $descriptions = array();
90 $descriptions['untranslated'] = '$1 message(s) of $2 are not translated to $3, but exist in en:';
91 $descriptions['duplicate'] = '$1 message(s) of $2 are translated the same in en and $3:';
92 $descriptions['obsolete'] = '$1 message(s) of $2 do not exist in en or are in the ignore list, but are in $3';
93 $descriptions['variables'] = '$1 message(s) of $2 in $3 don\'t use some variables that en uses:';
94 $descriptions['plural'] = '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:';
95 $descriptions['empty'] = '$1 message(s) of $2 in $3 are empty or -:';
96 $descriptions['whitespace'] = '$1 message(s) of $2 in $3 have trailing whitespace:';
97 $descriptions['xhtml'] = '$1 message(s) of $2 in $3 contain illegal XHTML:';
98 $descriptions['chars'] = '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:';
99 $descriptions['links'] = '$1 message(s) of $2 in $3 have problematic link(s):';
100 $descriptions['unbalanced'] = '$1 message(s) of $2 in $3 have unbalanced {[]}:';
101 return $descriptions;
102 }
103
104 protected function help() {
105 return <<<ENDS
106 Run this script to check a specific language file, or all of them.
107 Command line settings are in form --parameter[=value].
108 Parameters:
109 * lang: Language code (default: the installation default language).
110 * all: Check all customized languages.
111 * help: Show this help.
112 * level: Show the following level (default: 2).
113 * links: Link the message values (default off).
114 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
115 * whitelist: Do only the following checks (form: code,code).
116 * blacklist: Don't do the following checks (form: code,code).
117 * noexif: Don't check for EXIF messages (a bit hard and boring to translate), if you know that they are currently not translated and want to focus on other problems (default off).
118 Check codes (ideally, all of them should result 0; all the checks are executed by default (except duplicate and language specific check blacklists in checkLanguage.inc):
119 * untranslated: Messages which are required to translate, but are not translated.
120 * duplicate: Messages which translation equal to fallback
121 * obsolete: Messages which are untranslatable, but translated.
122 * variables: Messages without variables which should be used.
123 * empty: Empty messages.
124 * whitespace: Messages which have trailing whitespace.
125 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
126 * chars: Messages with hidden characters.
127 * links: Messages which contains broken links to pages (does not find all).
128 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
129 Display levels (default: 2):
130 * 0: Skip the checks (useful for checking syntax).
131 * 1: Show only the stub headers and number of wrong messages, without list of messages.
132 * 2: Show only the headers and the message keys, without the message values.
133 * 3: Show both the headers and the complete messages, with both keys and values.
134
135 ENDS;
136 }
137
138 public function execute() {
139 $this->doChecks();
140 if ( $this->level > 0 ) {
141 switch ($this->output) {
142 case 'plain':
143 $this->outputText();
144 break;
145 case 'wiki':
146 $this->outputWiki();
147 break;
148 default:
149 throw new MWException( "Invalid output type $this->output");
150 }
151 }
152 }
153
154 protected function doChecks() {
155 $ignoredCodes = array( 'en', 'enRTL' );
156
157 $this->results = array();
158 # Check the language
159 if ( $this->checkAll ) {
160 foreach ( $this->L->getLanguages() as $language ) {
161 if ( !in_array($language, $ignoredCodes) ) {
162 $this->results[$language] = $this->checkLanguage( $language );
163 }
164 }
165 } else {
166 if ( in_array($this->code, $ignoredCodes) ) {
167 throw new MWException("Cannot check code $this->code.");
168 } else {
169 $this->results[$this->code] = $this->checkLanguage( $this->code );
170 }
171 }
172 }
173
174 protected function getCheckBlacklist() {
175 global $checkBlacklist;
176 return $checkBlacklist;
177 }
178
179 protected function checkLanguage( $code ) {
180 # Syntax check only
181 if ( $this->level === 0 ) {
182 $this->L->getMessages( $code );
183 return;
184 }
185
186 $results = array();
187 $checkFunctions = $this->getChecks();
188 $checkBlacklist = $this->getCheckBlacklist();
189 foreach ( $this->checks as $check ) {
190 if ( isset($checkBlacklist[$code]) &&
191 in_array($check, $checkBlacklist[$code]) ) {
192 $result[$check] = array();
193 continue;
194 }
195
196 $callback = array( $this->L, $checkFunctions[$check] );
197 if ( !is_callable($callback ) ) {
198 throw new MWException( "Unkown check $check." );
199 }
200 $results[$check] = call_user_func( $callback , $code );
201 }
202
203 return $results;
204 }
205
206 protected function formatKey( $key, $code ) {
207 if ( $this->doLinks ) {
208 $displayKey = ucfirst( $key );
209 if ( $code == $this->wikiCode ) {
210 return "[[MediaWiki:$displayKey|$key]]";
211 } else {
212 return "[[MediaWiki:$displayKey/$code|$key]]";
213 }
214 } else {
215 return $key;
216 }
217 }
218
219 protected function outputText() {
220 foreach ( $this->results as $code => $results ) {
221 $translated = $this->L->getMessages( $code );
222 $translated = count( $translated['translated'] );
223 $translatable = $this->L->getGeneralMessages();
224 $translatable = count( $translatable['translatable'] );
225 foreach ( $results as $check => $messages ) {
226 $count = count( $messages );
227 if ( $count ) {
228 $search = array( '$1', '$2', '$3' );
229 $replace = array( $count, $check == 'untranslated' ? $translatable: $translated, $code );
230 $descriptions = $this->getDescriptions();
231 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n";
232 if ( $this->level == 1 ) {
233 echo "[messages are hidden]\n";
234 } else {
235 foreach ( $messages as $key => $value ) {
236 $displayKey = $this->formatKey( $key, $code );
237 if ( $this->level == 2 ) {
238 echo "* $displayKey\n";
239 } else {
240 echo "* $displayKey: '$value'\n";
241 }
242 }
243 }
244 }
245 }
246 }
247 }
248
249 /**
250 * Globals: $wgContLang, $IP
251 */
252 function outputWiki() {
253 global $wgContLang, $IP;
254 $detailText = '';
255 $rows[] = '! Language !! Code !! Total !! ' . implode( ' !! ', $this->checks );
256 foreach ( $this->results as $code => $results ) {
257 $detailTextForLang = "==$code==\n";
258 $numbers = array();
259 $problems = 0;
260 $detailTextForLangChecks = array();
261 foreach ( $results as $check => $messages ) {
262 $count = count( $messages );
263 if ( $count ) {
264 $problems += $count;
265 $messageDetails = array();
266 foreach ( $messages as $key => $details ) {
267 $displayKey = $this->formatKey( $key, $code );
268 $messageDetails[] = $displayKey;
269 }
270 $detailTextForLangChecks[] = "===$code-$check===\n* " . implode( ', ', $messageDetails );
271 $numbers[] = "'''[[#$code-$check|$count]]'''";
272 } else {
273 $numbers[] = $count;
274 }
275
276 }
277
278 if ( count( $detailTextForLangChecks ) ) {
279 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n";
280 }
281
282 if ( !$problems ) { continue; } // Don't list languages without problems
283 $language = $wgContLang->getLanguageName( $code );
284 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers );
285 }
286
287 $tableRows = implode( "\n|-\n", $rows );
288
289 $version = SpecialVersion::getVersion( $IP );
290 echo <<<EOL
291 '''Check results are for:''' <code>$version</code>
292
293
294 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear:both;"
295 $tableRows
296 |}
297
298 $detailText
299
300 EOL;
301 }
302
303 protected function isEmpty() {
304 $empty = true;
305 foreach( $this->results as $code => $results ) {
306 foreach( $results as $check => $messages ) {
307 if( !empty( $messages ) ) {
308 $empty = false;
309 break;
310 }
311 }
312 if( !$empty ) {
313 break;
314 }
315 }
316 return $empty;
317 }
318 }
319
320 class CheckExtensionsCLI extends CheckLanguageCLI {
321 private $extensions;
322
323 public function __construct( Array $options, $extension ) {
324 if ( isset( $options['help'] ) ) {
325 echo $this->help();
326 exit();
327 }
328
329 if ( isset($options['lang']) ) {
330 $this->code = $options['lang'];
331 } else {
332 global $wgLanguageCode;
333 $this->code = $wgLanguageCode;
334 }
335
336 if ( isset($options['level']) ) {
337 $this->level = $options['level'];
338 }
339
340 $this->doLinks = isset($options['links']);
341
342 if ( isset($options['wikilang']) ) {
343 $this->wikiCode = $options['wikilang'];
344 }
345
346 if ( isset( $options['whitelist'] ) ) {
347 $this->checks = explode( ',', $options['whitelist'] );
348 } elseif ( isset( $options['blacklist'] ) ) {
349 $this->checks = array_diff(
350 $this->defaultChecks,
351 explode( ',', $options['blacklist'] )
352 );
353 } else {
354 $this->checks = $this->defaultChecks;
355 }
356
357 if ( isset($options['output']) ) {
358 $this->output = $options['output'];
359 }
360
361 # Some additional checks not enabled by default
362 if ( isset( $options['duplicate'] ) ) {
363 $this->checks[] = 'duplicate';
364 }
365
366 $this->extensions = array();
367 $extensions = new PremadeMediawikiExtensionGroups();
368 $extensions->addAll();
369 if( $extension == 'all' ) {
370 foreach( MessageGroups::singleton()->getGroups() as $group ) {
371 if( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) {
372 $this->extensions[] = new extensionLanguages( $group );
373 }
374 }
375 } elseif( $extension == 'wikimedia' ) {
376 $wikimedia = MessageGroups::getGroup( 'ext-0-wikimedia' );
377 foreach( $wikimedia->wmfextensions() as $extension ) {
378 $group = MessageGroups::getGroup( $extension );
379 $this->extensions[] = new extensionLanguages( $group );
380 }
381 } else {
382 $extensions = explode( ',', $extension );
383 foreach( $extensions as $extension ) {
384 $group = MessageGroups::getGroup( 'ext-' . $extension );
385 if( $group ) {
386 $extension = new extensionLanguages( $group );
387 $this->extensions[] = $extension;
388 } else {
389 print "No such extension $extension.\n";
390 }
391 }
392 }
393 }
394
395 protected function help() {
396 return <<<ENDS
397 Run this script to check the status of a specific language in extensions, or all of them.
398 Command line settings are in form --parameter[=value], except for the first one.
399 Parameters:
400 * First parameter (mandatory): Extension name, multiple extension names (separated by commas), "all" for all the extensions or "wikimedia" for extensions used by Wikimedia.
401 * lang: Language code (default: the installation default language).
402 * help: Show this help.
403 * level: Show the following level (default: 2).
404 * links: Link the message values (default off).
405 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
406 * whitelist: Do only the following checks (form: code,code).
407 * blacklist: Do not perform the following checks (form: code,code).
408 * duplicate: Additionally check for messages which are translated the same to English (default off).
409 Check codes (ideally, all of them should result 0; all the checks are executed by default (except duplicate and language specific check blacklists in checkLanguage.inc):
410 * untranslated: Messages which are required to translate, but are not translated.
411 * duplicate: Messages which translation equal to fallback
412 * obsolete: Messages which are untranslatable, but translated.
413 * variables: Messages without variables which should be used.
414 * empty: Empty messages.
415 * whitespace: Messages which have trailing whitespace.
416 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
417 * chars: Messages with hidden characters.
418 * links: Messages which contains broken links to pages (does not find all).
419 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
420 Display levels (default: 2):
421 * 0: Skip the checks (useful for checking syntax).
422 * 1: Show only the stub headers and number of wrong messages, without list of messages.
423 * 2: Show only the headers and the message keys, without the message values.
424 * 3: Show both the headers and the complete messages, with both keys and values.
425
426 ENDS;
427 }
428
429 public function execute() {
430 $this->doChecks();
431 }
432
433 protected function checkLanguage( $code ) {
434 foreach( $this->extensions as $extension ) {
435 $this->L = $extension;
436 $this->results = array();
437 $this->results[$code] = parent::checkLanguage( $code );
438
439 if( !$this->isEmpty() ) {
440 echo $extension->name() . ":\n";
441
442 if( $this->level > 0 ) {
443 switch( $this->output ) {
444 case 'plain':
445 $this->outputText();
446 break;
447 case 'wiki':
448 $this->outputWiki();
449 break;
450 default:
451 throw new MWException( "Invalid output type $this->output" );
452 }
453 }
454
455 echo "\n";
456 }
457 }
458 }
459 }
460
461 # Blacklist some checks for some languages
462 $checkBlacklist = array(
463 #'code' => array( 'check1', 'check2' ... )
464 'gan' => array( 'plural' ),
465 'gn' => array( 'plural' ),
466 'hak' => array( 'plural' ),
467 'hu' => array( 'plural' ),
468 'ja' => array( 'plural' ), // Does not use plural
469 'ka' => array( 'plural' ),
470 'kk-arab' => array( 'plural' ),
471 'kk-cyrl' => array( 'plural' ),
472 'kk-latn' => array( 'plural' ),
473 'ko' => array( 'plural' ),
474 'mn' => array( 'plural' ),
475 'ms' => array( 'plural' ),
476 'my' => array( 'chars' ), // Uses a lot zwnj
477 'sah' => array( 'plural' ),
478 'sq' => array( 'plural' ),
479 'tet' => array( 'plural' ),
480 'th' => array( 'plural' ),
481 'wuu' => array( 'plural' ),
482 'xmf' => array( 'plural' ),
483 'yue' => array( 'plural' ),
484 'zh' => array( 'plural' ),
485 'zh-classical' => array( 'plural' ),
486 'zh-cn' => array( 'plural' ),
487 'zh-hans' => array( 'plural' ),
488 'zh-hant' => array( 'plural' ),
489 'zh-hk' => array( 'plural' ),
490 'zh-sg' => array( 'plural' ),
491 'zh-tw' => array( 'plural' ),
492 'zh-yue' => array( 'plural' ),
493 );