Stray space
[lhc/web/wiklou.git] / languages / LanguageUtf8.php
1 <?php
2 #$Id$
3 if( defined( "MEDIAWIKI" ) ) {
4
5 $wgInputEncoding = "UTF-8";
6 $wgOutputEncoding = "UTF-8";
7
8 if (function_exists('mb_internal_encoding')) {
9 mb_internal_encoding('UTF-8');
10 }
11
12 $wikiUpperChars = $wgMemc->get( $key1 = "$wgDBname:utf8:upper" );
13 $wikiLowerChars = $wgMemc->get( $key2 = "$wgDBname:utf8:lower" );
14
15 if(empty( $wikiUpperChars) || empty($wikiLowerChars )) {
16 require_once( "includes/Utf8Case.php" );
17 $wgMemc->set( $key1, $wikiUpperChars );
18 $wgMemc->set( $key2, $wikiLowerChars );
19 }
20
21 # Base stuff useful to all UTF-8 based language files
22 class LanguageUtf8 extends Language {
23
24 # These two functions use mbstring library, if it is loaded
25 # or compiled and character mapping arrays otherwise.
26 # In case of language-specific character mismatch
27 # it should be dealt with in Language classes.
28
29 function ucfirst( $string ) {
30 if (function_exists('mb_strtoupper')) {
31 return mb_strtoupper(mb_substr($string,0,1)).mb_substr($string,1);
32 } else {
33 global $wikiUpperChars;
34 return preg_replace (
35 "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
36 "strtr ( \"\$1\" , \$wikiUpperChars )",
37 $string );
38 }
39 }
40
41 function lcfirst( $string ) {
42 if (function_exists('mb_strtolower')) {
43 return mb_strtolower(mb_substr($string,0,1)).mb_substr($string,1);
44 } else {
45 global $wikiLowerChars;
46 return preg_replace (
47 "/^([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
48 "strtr ( \"\$1\" , \$wikiLowerChars )",
49 $string );
50 }
51 }
52
53 function stripForSearch( $string ) {
54 # MySQL fulltext index doesn't grok utf-8, so we
55 # need to fold cases and convert to hex
56
57 # In Language:: it just returns lowercase, maybe
58 # all strtolower on stripped output or argument
59 # should be removed and all stripForSearch
60 # methods adjusted to that.
61 if (function_exists('mb_strtolower')) {
62 return preg_replace(
63 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
64 "'U8' . bin2hex( $1 )",
65 mb_strtolower($string) );
66 } else {
67 global $wikiLowerChars;
68 return preg_replace(
69 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
70 "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
71 $string );
72 }
73 }
74
75 function fallback8bitEncoding() {
76 # Windows codepage 1252 is a superset of iso 8859-1
77 # override this to use difference source encoding to
78 # translate incoming 8-bit URLs.
79 return "windows-1252";
80 }
81
82 function checkTitleEncoding( $s ) {
83 global $wgInputEncoding;
84
85 # Check for non-UTF-8 URLs
86 $ishigh = preg_match( '/[\x80-\xff]/', $s);
87 if(!$ishigh) return $s;
88
89 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
90 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
91 if( $isutf8 ) return $s;
92
93 return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s );
94 }
95
96 function firstChar( $s ) {
97 preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
98 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/', $s, $matches);
99
100 return isset( $matches[1] ) ? $matches[1] : "";
101 }
102
103 # Crop a string from the beginning or end to a certain number of bytes.
104 # (Bytes are used because our storage has limited byte lengths for some
105 # columns in the database.) Multibyte charsets will need to make sure that
106 # only whole characters are included!
107 #
108 # $length does not include the optional ellipsis.
109 # If $length is negative, snip from the beginning
110 function truncate( $string, $length, $ellipsis = "" ) {
111 if( $length == 0 ) {
112 return $ellipsis;
113 }
114 if ( strlen( $string ) <= abs( $length ) ) {
115 return $string;
116 }
117 if( $length > 0 ) {
118 $string = substr( $string, 0, $length );
119 $char = ord( $string[strlen( $string ) - 1] );
120 if ($char >= 0xc0) {
121 # We got the first byte only of a multibyte char; remove it.
122 $string = substr( $string, 0, -1 );
123 } elseif( $char >= 0x80 &&
124 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
125 '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) {
126 # We chopped in the middle of a character; remove it
127 $string = $m[1];
128 }
129 return $string . $ellipsis;
130 } else {
131 $string = substr( $string, $length );
132 $char = ord( $string[0] );
133 if( $char >= 0x80 && $char < 0xc0 ) {
134 # We chopped in the middle of a character; remove the whole thing
135 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
136 }
137 return $ellipsis . $string;
138 }
139 }
140 }
141
142 } # ifdef MEDIAWIKI
143
144 ?>