3 * Helper class for checkLanguage.php script.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
21 * @ingroup MaintenanceLanguage
25 * @ingroup MaintenanceLanguage
27 class CheckLanguageCLI {
28 protected $code = null;
30 protected $doLinks = false;
31 protected $linksPrefix = '';
32 protected $wikiCode = 'en';
33 protected $checkAll = false;
34 protected $output = 'plain';
35 protected $checks = [];
38 protected $results = [];
40 private $includeExif = false;
43 * @param array $options Options for script.
45 public function __construct( array $options ) {
46 if ( isset( $options['help'] ) ) {
51 if ( isset( $options['lang'] ) ) {
52 $this->code = $options['lang'];
54 global $wgLanguageCode;
55 $this->code = $wgLanguageCode;
58 if ( isset( $options['level'] ) ) {
59 $this->level = $options['level'];
62 $this->doLinks = isset( $options['links'] );
63 $this->includeExif = !isset( $options['noexif'] );
64 $this->checkAll = isset( $options['all'] );
66 if ( isset( $options['prefix'] ) ) {
67 $this->linksPrefix = $options['prefix'];
70 if ( isset( $options['wikilang'] ) ) {
71 $this->wikiCode = $options['wikilang'];
74 if ( isset( $options['whitelist'] ) ) {
75 $this->checks = explode( ',', $options['whitelist'] );
76 } elseif ( isset( $options['blacklist'] ) ) {
77 $this->checks = array_diff(
78 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(),
79 explode( ',', $options['blacklist'] )
81 } elseif ( isset( $options['easy'] ) ) {
82 $this->checks = $this->easyChecks();
84 $this->checks = $this->defaultChecks();
87 if ( isset( $options['output'] ) ) {
88 $this->output = $options['output'];
91 $this->L = new Languages( $this->includeExif );
95 * Get the default checks.
96 * @return array A list of the default checks.
98 protected function defaultChecks() {
100 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
101 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 'namespace',
102 'projecttalk', 'magic', 'magic-old', 'magic-over', 'magic-case',
103 'special', 'special-old',
108 * Get the checks which check other things than messages.
109 * @return array A list of the non-message checks.
111 protected function nonMessageChecks() {
113 'namespace', 'projecttalk', 'magic', 'magic-old', 'magic-over',
114 'magic-case', 'special', 'special-old',
119 * Get the checks that can easily be treated by non-speakers of the language.
120 * @return array A list of the easy checks.
122 protected function easyChecks() {
124 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 'magic-old',
125 'magic-over', 'magic-case', 'special-old',
131 * @return array An array of all check names mapped to their function names.
133 protected function getChecks() {
135 'untranslated' => 'getUntranslatedMessages',
136 'duplicate' => 'getDuplicateMessages',
137 'obsolete' => 'getObsoleteMessages',
138 'variables' => 'getMessagesWithMismatchVariables',
139 'plural' => 'getMessagesWithoutPlural',
140 'empty' => 'getEmptyMessages',
141 'whitespace' => 'getMessagesWithWhitespace',
142 'xhtml' => 'getNonXHTMLMessages',
143 'chars' => 'getMessagesWithWrongChars',
144 'links' => 'getMessagesWithDubiousLinks',
145 'unbalanced' => 'getMessagesWithUnbalanced',
146 'namespace' => 'getUntranslatedNamespaces',
147 'projecttalk' => 'getProblematicProjectTalks',
148 'magic' => 'getUntranslatedMagicWords',
149 'magic-old' => 'getObsoleteMagicWords',
150 'magic-over' => 'getOverridingMagicWords',
151 'magic-case' => 'getCaseMismatchMagicWords',
152 'special' => 'getUntraslatedSpecialPages',
153 'special-old' => 'getObsoleteSpecialPages',
158 * Get total count for each check non-messages check.
159 * @return array An array of all check names mapped to a two-element array:
160 * function name to get the total count and language code or null
163 protected function getTotalCount() {
165 'namespace' => [ 'getNamespaceNames', 'en' ],
166 'projecttalk' => null,
167 'magic' => [ 'getMagicWords', 'en' ],
168 'magic-old' => [ 'getMagicWords', null ],
169 'magic-over' => [ 'getMagicWords', null ],
170 'magic-case' => [ 'getMagicWords', null ],
171 'special' => [ 'getSpecialPageAliases', 'en' ],
172 'special-old' => [ 'getSpecialPageAliases', null ],
177 * Get all check descriptions.
178 * @return array An array of all check names mapped to their descriptions.
180 protected function getDescriptions() {
182 'untranslated' => '$1 message(s) of $2 are not translated to $3, but exist in en:',
183 'duplicate' => '$1 message(s) of $2 are translated the same in en and $3:',
185 '$1 message(s) of $2 do not exist in en or are in the ignore list, but exist in $3:',
186 'variables' => '$1 message(s) of $2 in $3 don\'t match the variables used in en:',
187 'plural' => '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:',
188 'empty' => '$1 message(s) of $2 in $3 are empty or -:',
189 'whitespace' => '$1 message(s) of $2 in $3 have trailing whitespace:',
190 'xhtml' => '$1 message(s) of $2 in $3 contain illegal XHTML:',
192 '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:',
193 'links' => '$1 message(s) of $2 in $3 have problematic link(s):',
194 'unbalanced' => '$1 message(s) of $2 in $3 have unbalanced {[]}:',
195 'namespace' => '$1 namespace name(s) of $2 are not translated to $3, but exist in en:',
197 '$1 namespace name(s) and alias(es) in $3 are project talk namespaces without the parameter:',
198 'magic' => '$1 magic word(s) of $2 are not translated to $3, but exist in en:',
199 'magic-old' => '$1 magic word(s) of $2 do not exist in en, but exist in $3:',
200 'magic-over' => '$1 magic word(s) of $2 in $3 do not contain the original en word(s):',
202 '$1 magic word(s) of $2 in $3 change the case-sensitivity of the original en word:',
203 'special' => '$1 special page alias(es) of $2 are not translated to $3, but exist in en:',
204 'special-old' => '$1 special page alias(es) of $2 do not exist in en, but exist in $3:',
210 * @return string The help string.
212 protected function help() {
214 Run this script to check a specific language file, or all of them.
215 Command line settings are in form --parameter[=value].
217 --help: Show this help.
218 --lang: Language code (default: the installation default language).
219 --all: Check all customized languages.
220 --level: Show the following display level (default: 2):
221 * 0: Skip the checks (useful for checking syntax).
222 * 1: Show only the stub headers and number of wrong messages, without
224 * 2: Show only the headers and the message keys, without the message
226 * 3: Show both the headers and the complete messages, with both keys and
228 --links: Link the message values (default off).
229 --prefix: prefix to add to links.
230 --wikilang: For the links, what is the content language of the wiki to
231 display the output in (default en).
232 --noexif: Do not check for Exif messages (a bit hard and boring to
233 translate), if you know what they are currently not translated and want
234 to focus on other problems (default off).
235 --whitelist: Do only the following checks (form: code,code).
236 --blacklist: Do not do the following checks (form: code,code).
237 --easy: Do only the easy checks, which can be treated by non-speakers of
240 Check codes (ideally, all of them should result 0; all the checks are executed
241 by default (except language-specific check blacklists in checkLanguage.inc):
242 * untranslated: Messages which are required to translate, but are not
244 * duplicate: Messages which translation equal to fallback.
245 * obsolete: Messages which are untranslatable or do not exist, but are
247 * variables: Messages without variables which should be used, or with
248 variables which should not be used.
249 * empty: Empty messages and messages that contain only -.
250 * whitespace: Messages which have trailing whitespace.
251 * xhtml: Messages which are not well-formed XHTML (checks only few common
253 * chars: Messages with hidden characters.
254 * links: Messages which contains broken links to pages (does not find all).
255 * unbalanced: Messages which contains unequal numbers of opening {[ and
257 * namespace: Namespace names that were not translated.
258 * projecttalk: Namespace names and aliases where the project talk does not
260 * magic: Magic words that were not translated.
261 * magic-old: Magic words which do not exist.
262 * magic-over: Magic words that override the original English word.
263 * magic-case: Magic words whose translation changes the case-sensitivity of
264 the original English word.
265 * special: Special page names that were not translated.
266 * special-old: Special page names which do not exist.
272 * Execute the script.
274 public function execute() {
276 if ( $this->level > 0 ) {
277 switch ( $this->output ) {
285 throw new MWException( "Invalid output type $this->output" );
291 * Execute the checks.
293 protected function doChecks() {
294 $ignoredCodes = [ 'en', 'enRTL' ];
298 if ( $this->checkAll ) {
299 foreach ( $this->L->getLanguages() as $language ) {
300 if ( !in_array( $language, $ignoredCodes ) ) {
301 $this->results[$language] = $this->checkLanguage( $language );
305 if ( in_array( $this->code, $ignoredCodes ) ) {
306 throw new MWException( "Cannot check code $this->code." );
308 $this->results[$this->code] = $this->checkLanguage( $this->code );
312 $results = $this->results;
313 foreach ( $results as $code => $checks ) {
314 foreach ( $checks as $check => $messages ) {
315 foreach ( $messages as $key => $details ) {
316 if ( $this->isCheckBlacklisted( $check, $code, $key ) ) {
317 unset( $this->results[$code][$check][$key] );
325 * Get the check blacklist.
326 * @return array The list of checks which should not be executed.
328 protected function getCheckBlacklist() {
329 static $blacklist = null;
331 if ( $blacklist !== null ) {
335 // @codingStandardsIgnoreStart Ignore that globals should have a "wg" prefix.
336 global $checkBlacklist;
337 // @codingStandardsIgnoreEnd
339 $blacklist = $checkBlacklist;
341 Hooks::run( 'LocalisationChecksBlacklist', [ &$blacklist ] );
347 * Verify whether a check is blacklisted.
349 * @param string $check Check name
350 * @param string $code Language code
351 * @param string|bool $message Message name, or False for a whole language
352 * @return bool Whether the check is blacklisted
354 protected function isCheckBlacklisted( $check, $code, $message ) {
355 $blacklist = $this->getCheckBlacklist();
357 foreach ( $blacklist as $item ) {
358 if ( isset( $item['check'] ) && $check !== $item['check'] ) {
362 if ( isset( $item['code'] ) && !in_array( $code, $item['code'] ) ) {
366 if ( isset( $item['message'] ) &&
367 ( $message === false || !in_array( $message, $item['message'] ) )
380 * @param string $code The language code.
381 * @throws MWException
382 * @return array The results.
384 protected function checkLanguage( $code ) {
387 if ( $this->level === 0 ) {
388 $this->L->getMessages( $code );
393 $checkFunctions = $this->getChecks();
394 foreach ( $this->checks as $check ) {
395 if ( $this->isCheckBlacklisted( $check, $code, false ) ) {
396 $results[$check] = [];
400 $callback = [ $this->L, $checkFunctions[$check] ];
401 if ( !is_callable( $callback ) ) {
402 throw new MWException( "Unkown check $check." );
404 $results[$check] = call_user_func( $callback, $code );
411 * Format a message key.
412 * @param string $key The message key.
413 * @param string $code The language code.
414 * @return string The formatted message key.
416 protected function formatKey( $key, $code ) {
417 if ( $this->doLinks ) {
418 $displayKey = ucfirst( $key );
419 if ( $code == $this->wikiCode ) {
420 return "[[{$this->linksPrefix}MediaWiki:$displayKey|$key]]";
422 return "[[{$this->linksPrefix}MediaWiki:$displayKey/$code|$key]]";
430 * Output the checks results as plain text.
432 protected function outputText() {
433 foreach ( $this->results as $code => $results ) {
434 $translated = $this->L->getMessages( $code );
435 $translated = count( $translated['translated'] );
436 foreach ( $results as $check => $messages ) {
437 $count = count( $messages );
439 if ( $check == 'untranslated' ) {
440 $translatable = $this->L->getGeneralMessages();
441 $total = count( $translatable['translatable'] );
442 } elseif ( in_array( $check, $this->nonMessageChecks() ) ) {
443 $totalCount = $this->getTotalCount();
444 $totalCount = $totalCount[$check];
445 $callback = [ $this->L, $totalCount[0] ];
446 $callCode = $totalCount[1] ? $totalCount[1] : $code;
447 $total = count( call_user_func( $callback, $callCode ) );
449 $total = $translated;
451 $search = [ '$1', '$2', '$3' ];
452 $replace = [ $count, $total, $code ];
453 $descriptions = $this->getDescriptions();
454 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n";
455 if ( $this->level == 1 ) {
456 echo "[messages are hidden]\n";
458 foreach ( $messages as $key => $value ) {
459 if ( !in_array( $check, $this->nonMessageChecks() ) ) {
460 $key = $this->formatKey( $key, $code );
462 if ( $this->level == 2 || empty( $value ) ) {
465 echo "* $key: '$value'\n";
475 * Output the checks results as wiki text.
477 function outputWiki() {
479 $rows[] = '! Language !! Code !! Total !! ' .
480 implode( ' !! ', array_diff( $this->checks, $this->nonMessageChecks() ) );
481 foreach ( $this->results as $code => $results ) {
482 $detailTextForLang = "==$code==\n";
485 $detailTextForLangChecks = [];
486 foreach ( $results as $check => $messages ) {
487 if ( in_array( $check, $this->nonMessageChecks() ) ) {
490 $count = count( $messages );
493 $messageDetails = [];
494 foreach ( $messages as $key => $details ) {
495 $displayKey = $this->formatKey( $key, $code );
496 $messageDetails[] = $displayKey;
498 $detailTextForLangChecks[] = "=== $code-$check ===\n* " . implode( ', ', $messageDetails );
499 $numbers[] = "'''[[#$code-$check|$count]]'''";
505 if ( count( $detailTextForLangChecks ) ) {
506 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n";
510 # Don't list languages without problems
513 $language = Language::fetchLanguageName( $code );
514 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers );
517 $tableRows = implode( "\n|-\n", $rows );
519 $version = SpecialVersion::getVersion( 'nodb' );
520 // @codingStandardsIgnoreStart Long line.
522 '''Check results are for:''' <code>$version</code>
525 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear: both;"
532 // @codingStandardsIgnoreEnd
536 * Check if there are any results for the checks, in any language.
537 * @return bool True if there are any results, false if not.
539 protected function isEmpty() {
540 foreach ( $this->results as $results ) {
541 foreach ( $results as $messages ) {
542 if ( !empty( $messages ) ) {
553 * @ingroup MaintenanceLanguage
555 class CheckExtensionsCLI extends CheckLanguageCLI {
559 * @param array $options Options for script.
560 * @param string $extension The extension name (or names).
562 public function __construct( array $options, $extension ) {
563 if ( isset( $options['help'] ) ) {
568 if ( isset( $options['lang'] ) ) {
569 $this->code = $options['lang'];
571 global $wgLanguageCode;
572 $this->code = $wgLanguageCode;
575 if ( isset( $options['level'] ) ) {
576 $this->level = $options['level'];
579 $this->doLinks = isset( $options['links'] );
581 if ( isset( $options['wikilang'] ) ) {
582 $this->wikiCode = $options['wikilang'];
585 if ( isset( $options['whitelist'] ) ) {
586 $this->checks = explode( ',', $options['whitelist'] );
587 } elseif ( isset( $options['blacklist'] ) ) {
588 $this->checks = array_diff(
589 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(),
590 explode( ',', $options['blacklist'] )
592 } elseif ( isset( $options['easy'] ) ) {
593 $this->checks = $this->easyChecks();
595 $this->checks = $this->defaultChecks();
598 if ( isset( $options['output'] ) ) {
599 $this->output = $options['output'];
602 # Some additional checks not enabled by default
603 if ( isset( $options['duplicate'] ) ) {
604 $this->checks[] = 'duplicate';
607 $this->extensions = [];
608 $extensions = new PremadeMediawikiExtensionGroups();
609 $extensions->addAll();
610 if ( $extension == 'all' ) {
611 foreach ( MessageGroups::singleton()->getGroups() as $group ) {
612 if ( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) {
613 $this->extensions[] = new ExtensionLanguages( $group );
616 } elseif ( $extension == 'wikimedia' ) {
617 $wikimedia = MessageGroups::getGroup( 'ext-0-wikimedia' );
618 foreach ( $wikimedia->wmfextensions() as $extension ) {
619 $group = MessageGroups::getGroup( $extension );
620 $this->extensions[] = new ExtensionLanguages( $group );
622 } elseif ( $extension == 'flaggedrevs' ) {
623 foreach ( MessageGroups::singleton()->getGroups() as $group ) {
624 if ( strpos( $group->getId(), 'ext-flaggedrevs-' ) === 0 && !$group->isMeta() ) {
625 $this->extensions[] = new ExtensionLanguages( $group );
629 $extensions = explode( ',', $extension );
630 foreach ( $extensions as $extension ) {
631 $group = MessageGroups::getGroup( 'ext-' . $extension );
633 $extension = new ExtensionLanguages( $group );
634 $this->extensions[] = $extension;
636 print "No such extension $extension.\n";
643 * Get the default checks.
644 * @return array A list of the default checks.
646 protected function defaultChecks() {
648 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
649 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced',
654 * Get the checks which check other things than messages.
655 * @return array A list of the non-message checks.
657 protected function nonMessageChecks() {
662 * Get the checks that can easily be treated by non-speakers of the language.
663 * @return array A list of the easy checks.
665 protected function easyChecks() {
667 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars',
673 * @return string The help string.
675 protected function help() {
677 Run this script to check the status of a specific language in extensions, or
678 all of them. Command line settings are in form --parameter[=value], except for
681 * First parameter (mandatory): Extension name, multiple extension names
682 (separated by commas), "all" for all the extensions, "wikimedia" for
683 extensions used by Wikimedia or "flaggedrevs" for all FLaggedRevs
685 * lang: Language code (default: the installation default language).
686 * help: Show this help.
687 * level: Show the following display level (default: 2).
688 * links: Link the message values (default off).
689 * wikilang: For the links, what is the content language of the wiki to
690 display the output in (default en).
691 * whitelist: Do only the following checks (form: code,code).
692 * blacklist: Do not perform the following checks (form: code,code).
693 * easy: Do only the easy checks, which can be treated by non-speakers of
696 Check codes (ideally, all of them should result 0; all the checks are executed
697 by default (except language-specific check blacklists in checkLanguage.inc):
698 * untranslated: Messages which are required to translate, but are not
700 * duplicate: Messages which translation equal to fallback.
701 * obsolete: Messages which are untranslatable, but translated.
702 * variables: Messages without variables which should be used, or with
703 variables which should not be used.
704 * empty: Empty messages.
705 * whitespace: Messages which have trailing whitespace.
706 * xhtml: Messages which are not well-formed XHTML (checks only few common
708 * chars: Messages with hidden characters.
709 * links: Messages which contains broken links to pages (does not find all).
710 * unbalanced: Messages which contains unequal numbers of opening {[ and
713 Display levels (default: 2):
714 * 0: Skip the checks (useful for checking syntax).
715 * 1: Show only the stub headers and number of wrong messages, without list
717 * 2: Show only the headers and the message keys, without the message
719 * 3: Show both the headers and the complete messages, with both keys and
726 * Execute the script.
728 public function execute() {
733 * Check a language and show the results.
734 * @param string $code The language code.
735 * @throws MWException
737 protected function checkLanguage( $code ) {
738 foreach ( $this->extensions as $extension ) {
739 $this->L = $extension;
741 $this->results[$code] = parent::checkLanguage( $code );
743 if ( !$this->isEmpty() ) {
744 echo $extension->name() . ":\n";
746 if ( $this->level > 0 ) {
747 switch ( $this->output ) {
755 throw new MWException( "Invalid output type $this->output" );
765 // Blacklist some checks for some languages or some messages
766 // Possible keys of the sub arrays are: 'check', 'code' and 'message'.
770 'code' => [ 'az', 'bo', 'cdo', 'dz', 'id', 'fa', 'gan', 'gan-hans',
771 'gan-hant', 'gn', 'hak', 'hu', 'ja', 'jv', 'ka', 'kk-arab',
772 'kk-cyrl', 'kk-latn', 'km', 'kn', 'ko', 'lzh', 'mn', 'ms',
773 'my', 'sah', 'sq', 'tet', 'th', 'to', 'tr', 'vi', 'wuu', 'xmf',
774 'yo', 'yue', 'zh', 'zh-classical', 'zh-cn', 'zh-hans',
775 'zh-hant', 'zh-hk', 'zh-sg', 'zh-tw', 'zh-yue'