3 * @ingroup MaintenanceLanguage
6 class CheckLanguageCLI {
7 protected $code = null;
9 protected $doLinks = false;
10 protected $wikiCode = 'en';
11 protected $checkAll = false;
12 protected $output = 'plain';
13 protected $checks = array();
16 protected $results = array();
18 private $includeExif = false;
22 * @param $options Options for script.
24 public function __construct( Array $options ) {
25 if ( isset( $options['help'] ) ) {
30 if ( isset($options['lang']) ) {
31 $this->code = $options['lang'];
33 global $wgLanguageCode;
34 $this->code = $wgLanguageCode;
37 if ( isset($options['level']) ) {
38 $this->level = $options['level'];
41 $this->doLinks = isset($options['links']);
42 $this->includeExif = !isset($options['noexif']);
43 $this->checkAll = isset($options['all']);
45 if ( isset($options['wikilang']) ) {
46 $this->wikiCode = $options['wikilang'];
49 if ( isset( $options['whitelist'] ) ) {
50 $this->checks = explode( ',', $options['whitelist'] );
51 } elseif ( isset( $options['blacklist'] ) ) {
52 $this->checks = array_diff(
53 $this->defaultChecks(),
54 explode( ',', $options['blacklist'] )
57 $this->checks = $this->defaultChecks();
60 if ( isset($options['output']) ) {
61 $this->output = $options['output'];
64 $this->L = new languages( $this->includeExif );
68 * Get the default checks.
69 * @return A list of the default checks.
71 protected function defaultChecks() {
73 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
74 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 'namespace',
75 'projecttalk', 'skin', 'magic', 'magic-over', 'magic-case', 'special',
80 * Get the non-message checks.
81 * @return A list of the non-message checks.
83 protected function nonMessageChecks() {
85 'namespace', 'projecttalk', 'skin', 'magic', 'magic-over', 'magic-case',
92 * @return An array of all check names mapped to their function names.
94 protected function getChecks() {
96 'untranslated' => 'getUntranslatedMessages',
97 'duplicate' => 'getDuplicateMessages',
98 'obsolete' => 'getObsoleteMessages',
99 'variables' => 'getMessagesWithoutVariables',
100 'plural' => 'getMessagesWithoutPlural',
101 'empty' => 'getEmptyMessages',
102 'whitespace' => 'getMessagesWithWhitespace',
103 'xhtml' => 'getNonXHTMLMessages',
104 'chars' => 'getMessagesWithWrongChars',
105 'links' => 'getMessagesWithDubiousLinks',
106 'unbalanced' => 'getMessagesWithUnbalanced',
107 'namespace' => 'getUntranslatedNamespaces',
108 'projecttalk' => 'getProblematicProjectTalks',
109 'skin' => 'getUntranslatedSkins',
110 'magic' => 'getUntranslatedMagicWords',
111 'magic-over' => 'getOverridingMagicWords',
112 'magic-case' => 'getCaseMismatchMagicWords',
113 'special' => 'getUntraslatedSpecialPages',
118 * Get all check descriptions.
119 * @return An array of all check names mapped to their descriptions.
121 protected function getDescriptions() {
123 'untranslated' => '$1 message(s) of $2 are not translated to $3, but exist in en:',
124 'duplicate' => '$1 message(s) of $2 are translated the same in en and $3:',
125 'obsolete' => '$1 message(s) of $2 do not exist in en or are in the ignore list, but exist in $3:',
126 'variables' => '$1 message(s) of $2 in $3 don\'t use some variables that en uses:',
127 'plural' => '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:',
128 'empty' => '$1 message(s) of $2 in $3 are empty or -:',
129 'whitespace' => '$1 message(s) of $2 in $3 have trailing whitespace:',
130 'xhtml' => '$1 message(s) of $2 in $3 contain illegal XHTML:',
131 'chars' => '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:',
132 'links' => '$1 message(s) of $2 in $3 have problematic link(s):',
133 'unbalanced' => '$1 message(s) of $2 in $3 have unbalanced {[]}:',
134 'namespace' => '$1 namespace name(s) of $2 are not translated to $3, but exist in en:',
135 'projecttalk' => '$1 namespace name(s) and alias(es) in $3 are project talk namespaces without the parameter:',
136 'skin' => '$1 skin name(s) of $2 are not translated to $3, but exist in en:',
137 'magic' => '$1 magic word(s) of $2 are not translated to $3, but exist in en:',
138 'magic-over' => '$1 magic word(s) of $2 in $3 do not contain the original en word(s):',
139 'magic-case' => '$1 magic word(s) of $2 in $3 change the case-sensitivity of the original en word:',
140 'special' => '$1 special page alias(es) of $2 are not translated to $3, but exist in en:',
146 * @return The help string.
148 protected function help() {
150 Run this script to check a specific language file, or all of them.
151 Command line settings are in form --parameter[=value].
153 * lang: Language code (default: the installation default language).
154 * all: Check all customized languages.
155 * help: Show this help.
156 * level: Show the following display level (default: 2).
157 * links: Link the message values (default off).
158 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
159 * whitelist: Do only the following checks (form: code,code).
160 * blacklist: Don't do the following checks (form: code,code).
161 * noexif: Don't check for EXIF messages (a bit hard and boring to translate), if you know that they are currently not translated and want to focus on other problems (default off).
162 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc):
163 * untranslated: Messages which are required to translate, but are not translated.
164 * duplicate: Messages which translation equal to fallback
165 * obsolete: Messages which are untranslatable, but translated.
166 * variables: Messages without variables which should be used.
167 * empty: Empty messages and messages that contain only -.
168 * whitespace: Messages which have trailing whitespace.
169 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
170 * chars: Messages with hidden characters.
171 * links: Messages which contains broken links to pages (does not find all).
172 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
173 * namespace: Namespace names that were not translated.
174 * projecttalk: Namespace names and aliases where the project talk does not contain $1.
175 * skin: Skin names that were not translated.
176 * magic: Magic words that were not translated.
177 * magic-over: Magic words that override the original English word.
178 * magic-case: Magic words whose translation changes the case-sensitivity of the original English word.
179 * special: Special page names that were not translated.
180 Display levels (default: 2):
181 * 0: Skip the checks (useful for checking syntax).
182 * 1: Show only the stub headers and number of wrong messages, without list of messages.
183 * 2: Show only the headers and the message keys, without the message values.
184 * 3: Show both the headers and the complete messages, with both keys and values.
190 * Execute the script.
192 public function execute() {
194 if ( $this->level > 0 ) {
195 switch ( $this->output ) {
203 throw new MWException( "Invalid output type $this->output" );
209 * Execute the checks.
211 protected function doChecks() {
212 $ignoredCodes = array( 'en', 'enRTL' );
214 $this->results = array();
216 if ( $this->checkAll ) {
217 foreach ( $this->L->getLanguages() as $language ) {
218 if ( !in_array( $language, $ignoredCodes ) ) {
219 $this->results[$language] = $this->checkLanguage( $language );
223 if ( in_array( $this->code, $ignoredCodes ) ) {
224 throw new MWException( "Cannot check code $this->code." );
226 $this->results[$this->code] = $this->checkLanguage( $this->code );
232 * Get the check blacklist.
233 * @return The list of checks which should not be executed.
235 protected function getCheckBlacklist() {
236 global $checkBlacklist;
237 return $checkBlacklist;
242 * @param $code The language code.
243 * @return The results.
245 protected function checkLanguage( $code ) {
247 if ( $this->level === 0 ) {
248 $this->L->getMessages( $code );
253 $checkFunctions = $this->getChecks();
254 $checkBlacklist = $this->getCheckBlacklist();
255 foreach ( $this->checks as $check ) {
256 if ( isset( $checkBlacklist[$code] ) &&
257 in_array( $check, $checkBlacklist[$code] ) ) {
258 $result[$check] = array();
262 $callback = array( $this->L, $checkFunctions[$check] );
263 if ( !is_callable( $callback ) ) {
264 throw new MWException( "Unkown check $check." );
266 $results[$check] = call_user_func( $callback , $code );
273 * Format a message key.
274 * @param $key The message key.
275 * @param $code The language code.
276 * @return The formatted message key.
278 protected function formatKey( $key, $code ) {
279 if ( $this->doLinks ) {
280 $displayKey = ucfirst( $key );
281 if ( $code == $this->wikiCode ) {
282 return "[[MediaWiki:$displayKey|$key]]";
284 return "[[MediaWiki:$displayKey/$code|$key]]";
292 * Output the checks results as plain text.
293 * @return The checks results as plain text.
295 protected function outputText() {
296 foreach ( $this->results as $code => $results ) {
297 $translated = $this->L->getMessages( $code );
298 $translated = count( $translated['translated'] );
299 foreach ( $results as $check => $messages ) {
300 $count = count( $messages );
304 $translatable = $this->L->getGeneralMessages();
305 $total = count( $translatable['translatable'] );
308 $total = count( $this->L->getNamespaceNames( 'en' ) );
314 $total = count( $this->L->getSkinNames( 'en' ) );
317 $total = count( $this->L->getMagicWords( 'en' ) );
321 $total = count( $this->L->getMagicWords( $code ) );
324 $total = count( $this->L->getSpecialPageAliases( 'en' ) );
327 $total = $translated;
329 $search = array( '$1', '$2', '$3' );
330 $replace = array( $count, $total, $code );
331 $descriptions = $this->getDescriptions();
332 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n";
333 if ( $this->level == 1 ) {
334 echo "[messages are hidden]\n";
336 foreach ( $messages as $key => $value ) {
337 if( !in_array( $check, $this->nonMessageChecks() ) ) {
338 $key = $this->formatKey( $key, $code );
340 if ( $this->level == 2 || empty( $value ) ) {
343 echo "* $key: '$value'\n";
353 * Output the checks results as wiki text.
354 * @return The checks results as wiki text.
356 function outputWiki() {
357 global $wgContLang, $IP;
359 $rows[] = '! Language !! Code !! Total !! ' . implode( ' !! ', $this->checks );
360 foreach ( $this->results as $code => $results ) {
361 $detailTextForLang = "==$code==\n";
364 $detailTextForLangChecks = array();
365 foreach ( $results as $check => $messages ) {
366 if( in_array( $check, $this->nonMessageChecks() ) ) {
369 $count = count( $messages );
372 $messageDetails = array();
373 foreach ( $messages as $key => $details ) {
374 $displayKey = $this->formatKey( $key, $code );
375 $messageDetails[] = $displayKey;
377 $detailTextForLangChecks[] = "=== $code-$check ===\n* " . implode( ', ', $messageDetails );
378 $numbers[] = "'''[[#$code-$check|$count]]'''";
385 if ( count( $detailTextForLangChecks ) ) {
386 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n";
390 # Don't list languages without problems
393 $language = $wgContLang->getLanguageName( $code );
394 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers );
397 $tableRows = implode( "\n|-\n", $rows );
399 $version = SpecialVersion::getVersion( $IP );
401 '''Check results are for:''' <code>$version</code>
404 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear: both;"
414 * Check if there are any results for the checks, in any language.
415 * @return True if there are any results, false if not.
417 protected function isEmpty() {
418 foreach( $this->results as $code => $results ) {
419 foreach( $results as $check => $messages ) {
420 if( !empty( $messages ) ) {
429 class CheckExtensionsCLI extends CheckLanguageCLI {
434 * @param $options Options for script.
435 * @param $extension The extension name (or names).
437 public function __construct( Array $options, $extension ) {
438 if ( isset( $options['help'] ) ) {
443 if ( isset($options['lang']) ) {
444 $this->code = $options['lang'];
446 global $wgLanguageCode;
447 $this->code = $wgLanguageCode;
450 if ( isset($options['level']) ) {
451 $this->level = $options['level'];
454 $this->doLinks = isset($options['links']);
456 if ( isset($options['wikilang']) ) {
457 $this->wikiCode = $options['wikilang'];
460 if ( isset( $options['whitelist'] ) ) {
461 $this->checks = explode( ',', $options['whitelist'] );
462 } elseif ( isset( $options['blacklist'] ) ) {
463 $this->checks = array_diff(
464 $this->defaultChecks(),
465 explode( ',', $options['blacklist'] )
468 $this->checks = $this->defaultChecks();
471 if ( isset($options['output']) ) {
472 $this->output = $options['output'];
475 # Some additional checks not enabled by default
476 if ( isset( $options['duplicate'] ) ) {
477 $this->checks[] = 'duplicate';
480 $this->extensions = array();
481 $extensions = new PremadeMediawikiExtensionGroups();
482 $extensions->addAll();
483 if( $extension == 'all' ) {
484 foreach( MessageGroups::singleton()->getGroups() as $group ) {
485 if( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) {
486 $this->extensions[] = new extensionLanguages( $group );
489 } elseif( $extension == 'wikimedia' ) {
490 $wikimedia = MessageGroups::getGroup( 'ext-0-wikimedia' );
491 foreach( $wikimedia->wmfextensions() as $extension ) {
492 $group = MessageGroups::getGroup( $extension );
493 $this->extensions[] = new extensionLanguages( $group );
496 $extensions = explode( ',', $extension );
497 foreach( $extensions as $extension ) {
498 $group = MessageGroups::getGroup( 'ext-' . $extension );
500 $extension = new extensionLanguages( $group );
501 $this->extensions[] = $extension;
503 print "No such extension $extension.\n";
510 * Get the default checks.
511 * @return A list of the default checks.
513 protected function defaultChecks() {
515 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
516 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced',
522 * @return The help string.
524 protected function help() {
526 Run this script to check the status of a specific language in extensions, or all of them.
527 Command line settings are in form --parameter[=value], except for the first one.
529 * First parameter (mandatory): Extension name, multiple extension names (separated by commas), "all" for all the extensions or "wikimedia" for extensions used by Wikimedia.
530 * lang: Language code (default: the installation default language).
531 * help: Show this help.
532 * level: Show the following display level (default: 2).
533 * links: Link the message values (default off).
534 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
535 * whitelist: Do only the following checks (form: code,code).
536 * blacklist: Do not perform the following checks (form: code,code).
537 * duplicate: Additionally check for messages which are translated the same to English (default off).
538 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc):
539 * untranslated: Messages which are required to translate, but are not translated.
540 * duplicate: Messages which translation equal to fallback
541 * obsolete: Messages which are untranslatable, but translated.
542 * variables: Messages without variables which should be used.
543 * empty: Empty messages.
544 * whitespace: Messages which have trailing whitespace.
545 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
546 * chars: Messages with hidden characters.
547 * links: Messages which contains broken links to pages (does not find all).
548 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
549 Display levels (default: 2):
550 * 0: Skip the checks (useful for checking syntax).
551 * 1: Show only the stub headers and number of wrong messages, without list of messages.
552 * 2: Show only the headers and the message keys, without the message values.
553 * 3: Show both the headers and the complete messages, with both keys and values.
559 * Execute the script.
561 public function execute() {
566 * Check a language and show the results.
567 * @param $code The language code.
569 protected function checkLanguage( $code ) {
570 foreach( $this->extensions as $extension ) {
571 $this->L = $extension;
572 $this->results = array();
573 $this->results[$code] = parent::checkLanguage( $code );
575 if( !$this->isEmpty() ) {
576 echo $extension->name() . ":\n";
578 if( $this->level > 0 ) {
579 switch( $this->output ) {
587 throw new MWException( "Invalid output type $this->output" );
597 # Blacklist some checks for some languages
598 $checkBlacklist = array(
599 #'code' => array( 'check1', 'check2' ... )
600 'gan' => array( 'plural' ),
601 'gn' => array( 'plural' ),
602 'hak' => array( 'plural' ),
603 'hu' => array( 'plural' ),
604 'ja' => array( 'plural' ), // Does not use plural
605 'ka' => array( 'plural' ),
606 'kk-arab' => array( 'plural' ),
607 'kk-cyrl' => array( 'plural' ),
608 'kk-latn' => array( 'plural' ),
609 'ko' => array( 'plural' ),
610 'mn' => array( 'plural' ),
611 'ms' => array( 'plural' ),
612 'my' => array( 'chars' ), // Uses a lot zwnj
613 'sah' => array( 'plural' ),
614 'sq' => array( 'plural' ),
615 'tet' => array( 'plural' ),
616 'th' => array( 'plural' ),
617 'wuu' => array( 'plural' ),
618 'xmf' => array( 'plural' ),
619 'yue' => array( 'plural' ),
620 'zh' => array( 'plural' ),
621 'zh-classical' => array( 'plural' ),
622 'zh-cn' => array( 'plural' ),
623 'zh-hans' => array( 'plural' ),
624 'zh-hant' => array( 'plural' ),
625 'zh-hk' => array( 'plural' ),
626 'zh-sg' => array( 'plural' ),
627 'zh-tw' => array( 'plural' ),
628 'zh-yue' => array( 'plural' ),