From d2ea5588477ea0200ea092505c01c55b801ef170 Mon Sep 17 00:00:00 2001 From: Rotem Liss Date: Sat, 5 Aug 2006 18:13:33 +0000 Subject: [PATCH] =?utf8?q?Adding=20a=20check=20for=20messages=20which=20in?= =?utf8?q?clude=20hidden=20chars=20(mostly=20written=20by=20Niklas=20Laxst?= =?utf8?q?r=C3=B6m).?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- maintenance/checkLanguage.php | 5 +++++ maintenance/languages.inc | 37 +++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/maintenance/checkLanguage.php b/maintenance/checkLanguage.php index 1681ff02c2..db299419f2 100644 --- a/maintenance/checkLanguage.php +++ b/maintenance/checkLanguage.php @@ -75,4 +75,9 @@ $nonXHTMLMessages = $wgLanguages->getNonXHTMLMessages( $code ); $nonXHTMLMessagesNumber = count( $nonXHTMLMessages ); $wgLanguages->outputMessagesList( $nonXHTMLMessages, "\n$nonXHTMLMessagesNumber messages of $localMessagesNumber in $code are not well-formed XHTML:", $wgHideMessages ); +# Non-XHTML messages +$messagesWithWrongChars = $wgLanguages->getMessagesWithWrongChars( $code ); +$messagesWithWrongCharsNumber = count( $messagesWithWrongChars ); +$wgLanguages->outputMessagesList( $messagesWithWrongChars, "\n$messagesWithWrongCharsNumber messages of $localMessagesNumber in $code include hidden chars which should not be used in the messages:", $wgHideMessages ); + ?> diff --git a/maintenance/languages.inc b/maintenance/languages.inc index f8bee1c787..0f7ef75713 100644 --- a/maintenance/languages.inc +++ b/maintenance/languages.inc @@ -263,6 +263,43 @@ class languages { return $nonXHTMLMessages; } + /** + * Get the messages which include wrong characters. + * + * @param $code The langauge code. + * + * @return The messages which include wrong characters in this language. + */ + public function getMessagesWithWrongChars( $code ) { + $this->loadMessages( 'en' ); + $this->loadMessages( $code ); + $wrongChars = array( + '[LRM]' => "\xE2\x80\x8E", + '[RLM]' => "\xE2\x80\x8F", + '[LRE]' => "\xE2\x80\xAA", + '[RLE]' => "\xE2\x80\xAB", + '[POP]' => "\xE2\x80\xAC", + '[LRO]' => "\xE2\x80\xAD", + '[RLO]' => "\xE2\x80\xAB", + '[ZWSP]'=> "\xE2\x80\x8B", + '[NBSP]'=> "\xC2\xA0", + '[WJ]' => "\xE2\x81\xA0", + '[BOM]' => "\xEF\xBB\xBF", + '[FFFD]'=> "\xEF\xBF\xBD", + ); + $wrongRegExp = '/(' . implode( '|', array_values( $wrongChars ) ) . ')/sDu'; + $nonXHTMLMessages = array(); + foreach ( $this->mMessages[$code] as $key => $value ) { + if ( isset( $this->mMessages['en'][$key] ) && preg_match( $wrongRegExp, $value ) ) { + foreach ( $wrongChars as $viewableChar => $hiddenChar ) { + $value = str_replace( $hiddenChar, $viewableChar, $value ); + } + $nonXHTMLMessages[$key] = $value; + } + } + return $nonXHTMLMessages; + } + /** * Output a messages list. * -- 2.20.1