Don't allow high control chars in titles on non-utf8 wikis
[lhc/web/wiklou.git] / includes / Title.php
1 <?
2 # See title.doc
3 global $IP;
4 include_once( "$IP/Interwiki.php" );
5
6 class Title {
7 /* private */ var $mTextform, $mUrlform, $mDbkeyform;
8 /* private */ var $mNamespace, $mInterwiki, $mFragment;
9 /* private */ var $mArticleID, $mRestrictions, $mRestrictionsLoaded;
10
11 /* private */ function Title()
12 {
13 $this->mInterwiki = $this->mUrlform =
14 $this->mTextform = $this->mDbkeyform = "";
15 $this->mArticleID = -1;
16 $this->mNamespace = 0;
17 $this->mRestrictionsLoaded = false;
18 $this->mRestrictions = array();
19 }
20
21 # Static factory methods
22 #
23 function newFromDBkey( $key )
24 {
25 $t = new Title();
26 $t->mDbkeyform = $key;
27 $t->secureAndSplit();
28 return $t;
29 }
30
31 function newFromText( $text )
32 {
33 # Note - mixing latin1 named entities and unicode numbered
34 # ones will result in a bad link.
35 $trans = get_html_translation_table( HTML_ENTITIES );
36 $trans = array_flip( $trans );
37 $text = strtr( $text, $trans );
38
39 $text = wfMungeToUtf8( $text );
40
41 $text = urldecode( $text );
42
43 $t = new Title();
44 $t->mDbkeyform = str_replace( " ", "_", $text );
45 $t->secureAndSplit();
46 return $t;
47 }
48
49 function newFromURL( $url )
50 {
51 global $wgLang, $wgServer, $HTTP_SERVER_VARS;
52
53 $t = new Title();
54 $s = urldecode( $url ); # This is technically wrong, as anything
55 # we've gotten is already decoded by PHP.
56 # Kept for backwards compatibility with
57 # buggy URLs we had for a while...
58
59 # For links that came from outside, check for alternate/legacy
60 # character encoding.
61 if( strncmp($wgServer, $HTTP_SERVER_VARS["HTTP_REFERER"], strlen( $wgServer ) ) )
62 $s = $wgLang->checkTitleEncoding( $s );
63
64 $t->mDbkeyform = str_replace( " ", "_", $s );
65 $t->secureAndSplit();
66 return $t;
67 }
68
69 function legalChars()
70 {
71 global $wgInputEncoding;
72 if( $wgInputEncoding == "utf-8" ) {
73 return "-,.()' &;%!?_0-9A-Za-z\\/:\\x80-\\xFF";
74 } else {
75 # ISO 8859-* don't allow 0x80-0x9F
76 return "-,.()' &;%!?_0-9A-Za-z\\/:\\xA0-\\xFF";
77 }
78 }
79
80 function getInterwikiLink( $key )
81 {
82 global $wgValidInterwikis;
83
84 if ( array_key_exists( $key, $wgValidInterwikis ) ) {
85 return $wgValidInterwikis[$key];
86 } else return "";
87 }
88
89 function getText() { return $this->mTextform; }
90 function getURL() { return $this->mUrlform; }
91 function getDBkey() { return $this->mDbkeyform; }
92 function getNamespace() { return $this->mNamespace; }
93 function setNamespace( $n ) { $this->mNamespace = $n; }
94 function getInterwiki() { return $this->mInterwiki; }
95 function getFragment() { return $this->mFragment; }
96
97 /* static */ function indexTitle( $ns, $title )
98 {
99 global $wgDBminWordLen, $wgLang;
100
101 $lc = SearchEngine::legalSearchChars() . "&#;";
102 $t = $wgLang->stripForSearch( $title );
103 $t = preg_replace( "/[^{$lc}]+/", " ", $t );
104 $t = strtolower( $t );
105
106 # Handle 's, s'
107 $t = preg_replace( "/([{$lc}]+)'s( |$)/", "\\1 \\1's ", $t );
108 $t = preg_replace( "/([{$lc}]+)s'( |$)/", "\\1s ", $t );
109
110 $t = preg_replace( "/\\s+/", " ", $t );
111
112 if ( $ns == Namespace::getImage() ) {
113 $t = preg_replace( "/ (png|gif|jpg|jpeg|ogg)$/", "", $t );
114 }
115 return trim( $t );
116 }
117
118 function getIndexTitle()
119 {
120 return Title::indexTitle( $this->mNamespace, $this->mTextform );
121 }
122
123 /* static */ function makeName( $ns, $title )
124 {
125 global $wgLang;
126
127 $n = $wgLang->getNsText( $ns );
128 if ( "" == $n ) { return $title; }
129 else { return "{$n}:{$title}"; }
130 }
131
132 function getPrefixedDBkey()
133 {
134 $s = $this->prefix( $this->mDbkeyform );
135 $s = str_replace( " ", "_", $s );
136 return $s;
137 }
138
139 function getPrefixedText()
140 {
141 $s = $this->prefix( $this->mTextform );
142 $s = str_replace( "_", " ", $s );
143 return $s;
144 }
145
146 function getPrefixedURL()
147 {
148 $s = $this->prefix( $this->mDbkeyform );
149 $s = str_replace( " ", "_", $s );
150
151 $s = urlencode ( $s ) ;
152 # Cleaning up URL to make it look nice -- is this safe?
153 $s = preg_replace( "/%3[Aa]/", ":", $s );
154 $s = preg_replace( "/%2[Ff]/", "/", $s );
155 $s = str_replace( "%28", "(", $s );
156 $s = str_replace( "%29", ")", $s );
157 return $s;
158 }
159
160 function getFullURL()
161 {
162 global $wgLang, $wgArticlePath, $wgValidInterwikis;
163
164 if ( "" == $this->mInterwiki ) {
165 $p = $wgArticlePath;
166 } else {
167 $p = $wgValidInterwikis[$this->mInterwiki];
168 }
169 $n = $wgLang->getNsText( $this->mNamespace );
170 if ( "" != $n ) { $n .= ":"; }
171 $u = str_replace( "$1", $n . $this->mUrlform, $p );
172 if ( "" != $this->mFragment ) {
173 $u .= "#" . $this->mFragment;
174 }
175 return $u;
176 }
177
178 function getEditURL()
179 {
180 global $wgServer, $wgScript;
181
182 if ( "" != $this->mInterwiki ) { return ""; }
183 $s = wfLocalUrl( $this->getPrefixedURL(), "action=edit" );
184
185 return $s;
186 }
187
188 function isExternal() { return ( "" != $this->mInterwiki ); }
189
190 function isProtected()
191 {
192 if ( -1 == $this->mNamespace ) { return true; }
193 $a = $this->getRestrictions();
194 if ( in_array( "sysop", $a ) ) { return true; }
195 return false;
196 }
197
198 function isLog()
199 {
200 if ( $this->mNamespace != Namespace::getWikipedia() ) {
201 return false;
202 }
203 if ( ( 0 == strcmp( wfMsg( "uploadlogpage" ), $this->mDbkeyform ) ) ||
204 ( 0 == strcmp( wfMsg( "dellogpage" ), $this->mDbkeyform ) ) ) {
205 return true;
206 }
207 return false;
208 }
209
210 function userIsWatching()
211 {
212 global $wgUser;
213
214 if ( -1 == $this->mNamespace ) { return false; }
215 if ( 0 == $wgUser->getID() ) { return false; }
216
217 return $wgUser->isWatched( $this );
218 }
219
220 function userCanEdit()
221 {
222 global $wgUser;
223
224 if ( -1 == $this->mNamespace ) { return false; }
225 # if ( 0 == $this->getArticleID() ) { return false; }
226 if ( $this->mDbkeyform == "_" ) { return false; }
227
228 $ur = $wgUser->getRights();
229 foreach ( $this->getRestrictions() as $r ) {
230 if ( "" != $r && ( ! in_array( $r, $ur ) ) ) {
231 return false;
232 }
233 }
234 return true;
235 }
236
237 function getRestrictions()
238 {
239 $id = $this->getArticleID();
240 if ( 0 == $id ) { return array(); }
241
242 if ( ! $this->mRestrictionsLoaded ) {
243 $res = wfGetSQL( "cur", "cur_restrictions", "cur_id=$id" );
244 $this->mRestrictions = explode( ",", trim( $res ) );
245 $this->mRestrictionsLoaded = true;
246 }
247 return $this->mRestrictions;
248 }
249
250 function getArticleID()
251 {
252 global $wgLinkCache;
253
254 if ( -1 != $this->mArticleID ) { return $this->mArticleID; }
255 $this->mArticleID = $wgLinkCache->addLink(
256 $this->getPrefixedDBkey() );
257 return $this->mArticleID;
258 }
259
260 function resetArticleID( $newid )
261 {
262 global $wgLinkCache;
263 $wgLinkCache->clearBadLink( $this->getPrefixedDBkey() );
264
265 if ( 0 == $newid ) { $this->mArticleID = -1; }
266 else { $this->mArticleID = $newid; }
267 $this->mRestrictionsLoaded = false;
268 $this->mRestrictions = array();
269 }
270
271 /* private */ function prefix( $name )
272 {
273 global $wgLang;
274
275 $p = "";
276 if ( "" != $this->mInterwiki ) {
277 $p = $this->mInterwiki . ":";
278 }
279 if ( 0 != $this->mNamespace ) {
280 $p .= $wgLang->getNsText( $this->mNamespace ) . ":";
281 }
282 return $p . $name;
283 }
284
285 # Assumes that mDbkeyform has been set, and is urldecoded
286 # and uses undersocres, but not otherwise munged. This function
287 # removes illegal characters, splits off the winterwiki and
288 # namespace prefixes, sets the other forms, and canonicalizes
289 # everything. This one function is really at the core of
290 # Wiki--don't mess with it unless you're really sure you know
291 # what you're doing.
292 #
293 /* private */ function secureAndSplit()
294 {
295 global $wgLang, $wgValidInterwikis, $wgLocalInterwiki;
296
297 $validNamespaces = $wgLang->getNamespaces();
298 unset( $validNamespaces[0] );
299
300 $this->mInterwiki = $this->mFragment = "";
301 $this->mNamespace = 0;
302
303 $t = preg_replace( "/[\\s_]+/", "_", $this->mDbkeyform );
304 if ( "_" == $t{0} ) { $t = substr( $t, 1 ); }
305 $l = strlen( $t );
306 if ( $l && ( "_" == $t{$l-1} ) ) { $t = substr( $t, 0, $l-1 ); }
307 if ( "" == $t ) { $t = "_"; }
308
309 $this->mDbkeyform = $t;
310 $done = false;
311
312 $imgpre = ":" . $wgLang->getNsText( Namespace::getImage() ) . ":";
313 if ( 0 == strncasecmp( $imgpre, $t, strlen( $imgpre ) ) ) {
314 $t = substr( $t, 1 );
315 }
316 if ( ":" == $t{0} ) {
317 $r = substr( $t, 1 );
318 } else {
319 if ( preg_match( "/^([A-Za-z0-9_\\x80-\\xff]+):(.*)$/", $t, $m ) ) {
320 #$p = strtolower( $m[1] );
321 $p = $m[1];
322 if ( array_key_exists( $p, $wgValidInterwikis ) ) {
323 $t = $m[2];
324 $this->mInterwiki = $p;
325
326 if ( preg_match( "/^([A-Za-z0-9_\\x80-\\xff]+):(.*)$/",
327 $t, $m ) ) {
328 $p = strtolower( $m[1] );
329 } else {
330 $done = true;
331 }
332 if($this->mInterwiki != $wgLocalInterwiki)
333 $done = true;
334 }
335 if ( ! $done ) {
336 if ( $ns = $wgLang->getNsIndex( str_replace( " ", "_", $p ))) {
337 $t = $m[2];
338 $this->mNamespace = $ns;
339 }
340 # foreach ( $validNamespaces as $ns ) {
341 # if ( 0 == strcasecmp( $p, $ns ) ) {
342 # $t = $m[2];
343 # $this->mNamespace = $wgLang->getNsIndex(
344 # str_replace( " ", "_", $p ) );
345 # break;
346 # }
347 # }
348 }
349 }
350 $r = $t;
351 }
352 if ( 0 == strcmp( $this->mInterwiki, $wgLocalInterwiki ) ) {
353 $this->mInterwiki = "";
354 }
355 # We already know that some pages won't be in the database!
356 #
357 if ( "" != $this->mInterwiki || -1 == $this->mNamespace ) {
358 $this->mArticleID = 0;
359 }
360 $f = strstr( $r, "#" );
361 if ( false !== $f ) {
362 $this->mFragment = substr( $f, 1 );
363 $r = substr( $r, 0, strlen( $r ) - strlen( $f ) );
364 }
365 # Strip illegal characters.
366 #
367 $tc = Title::legalChars();
368 $t = preg_replace( "/[^{$tc}]/", "", $r );
369
370 if( $this->mInterwiki == "") $t = $wgLang->ucfirst( $t );
371 $this->mDbkeyform = $t;
372 $this->mUrlform = wfUrlencode( $t );
373 $this->mTextform = str_replace( "_", " ", $t );
374 }
375 }
376 ?>