Return nothing on empty math tags instead of char encoding
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * File for magic words
4 * @package MediaWiki
5 * @subpackage Parser
6 */
7
8 /**
9 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
10 * Usage:
11 * if (MagicWord::get( 'redirect' )->match( $text ) )
12 *
13 * Possible future improvements:
14 * * Simultaneous searching for a number of magic words
15 * * MagicWord::$mObjects in shared memory
16 *
17 * Please avoid reading the data out of one of these objects and then writing
18 * special case code. If possible, add another match()-like function here.
19 *
20 * To add magic words in an extension, use the LanguageGetMagic hook. For
21 * magic words which are also Parser variables, add a MagicWordwgVariableIDs
22 * hook. Use string keys.
23 *
24 * @package MediaWiki
25 */
26 class MagicWord {
27 /**#@+
28 * @private
29 */
30 var $mId, $mSynonyms, $mCaseSensitive, $mRegex;
31 var $mRegexStart, $mBaseRegex, $mVariableRegex;
32 var $mModified, $mFound;
33
34 static public $mVariableIDsInitialised = false;
35 static public $mVariableIDs = array(
36 'currentmonth',
37 'currentmonthname',
38 'currentmonthnamegen',
39 'currentmonthabbrev',
40 'currentday',
41 'currentday2',
42 'currentdayname',
43 'currentyear',
44 'currenttime',
45 'currenthour',
46 'localmonth',
47 'localmonthname',
48 'localmonthnamegen',
49 'localmonthabbrev',
50 'localday',
51 'localday2',
52 'localdayname',
53 'localyear',
54 'localtime',
55 'localhour',
56 'numberofarticles',
57 'numberoffiles',
58 'sitename',
59 'server',
60 'servername',
61 'scriptpath',
62 'pagename',
63 'pagenamee',
64 'fullpagename',
65 'fullpagenamee',
66 'namespace',
67 'namespacee',
68 'currentweek',
69 'currentdow',
70 'localweek',
71 'localdow',
72 'revisionid',
73 'revisionday',
74 'revisionday2',
75 'revisionmonth',
76 'revisionyear',
77 'revisiontimestamp',
78 'subpagename',
79 'subpagenamee',
80 'displaytitle',
81 'talkspace',
82 'talkspacee',
83 'subjectspace',
84 'subjectspacee',
85 'talkpagename',
86 'talkpagenamee',
87 'subjectpagename',
88 'subjectpagenamee',
89 'numberofusers',
90 'rawsuffix',
91 'newsectionlink',
92 'numberofpages',
93 'currentversion',
94 'basepagename',
95 'basepagenamee',
96 'urlencode',
97 'currenttimestamp',
98 'localtimestamp',
99 'directionmark',
100 'language',
101 'contentlanguage',
102 'pagesinnamespace',
103 'numberofadmins',
104 'defaultsort',
105 );
106
107 static public $mObjects = array();
108
109 /**#@-*/
110
111 function MagicWord($id = 0, $syn = '', $cs = false) {
112 $this->mId = $id;
113 $this->mSynonyms = (array)$syn;
114 $this->mCaseSensitive = $cs;
115 $this->mRegex = '';
116 $this->mRegexStart = '';
117 $this->mVariableRegex = '';
118 $this->mVariableStartToEndRegex = '';
119 $this->mModified = false;
120 }
121
122 /**
123 * Factory: creates an object representing an ID
124 * @static
125 */
126 static function &get( $id ) {
127 if (!array_key_exists( $id, self::$mObjects ) ) {
128 $mw = new MagicWord();
129 $mw->load( $id );
130 self::$mObjects[$id] = $mw;
131 }
132 return self::$mObjects[$id];
133 }
134
135 /**
136 * Get an array of parser variable IDs
137 */
138 static function getVariableIDs() {
139 if ( !self::$mVariableIDsInitialised ) {
140 # Deprecated constant definition hook, available for extensions that need it
141 $magicWords = array();
142 wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) );
143 foreach ( $magicWords as $word ) {
144 define( $word, $word );
145 }
146
147 # Get variable IDs
148 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
149 self::$mVariableIDsInitialised = true;
150 }
151 return self::$mVariableIDs;
152 }
153
154 # Initialises this object with an ID
155 function load( $id ) {
156 global $wgContLang;
157 $this->mId = $id;
158 $wgContLang->getMagic( $this );
159 if ( !$this->mSynonyms ) {
160 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
161 #throw new MWException( "Error: invalid magic word '$id'" );
162 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
163 }
164 }
165
166 /**
167 * Preliminary initialisation
168 * @private
169 */
170 function initRegex() {
171 #$variableClass = Title::legalChars();
172 # This was used for matching "$1" variables, but different uses of the feature will have
173 # different restrictions, which should be checked *after* the MagicWord has been matched,
174 # not here. - IMSoP
175
176 $escSyn = array();
177 foreach ( $this->mSynonyms as $synonym )
178 // In case a magic word contains /, like that's going to happen;)
179 $escSyn[] = preg_quote( $synonym, '/' );
180 $this->mBaseRegex = implode( '|', $escSyn );
181
182 $case = $this->mCaseSensitive ? '' : 'iu';
183 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
184 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
185 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
186 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
187 "/^(?:{$this->mBaseRegex})$/{$case}" );
188 }
189
190 /**
191 * Gets a regex representing matching the word
192 */
193 function getRegex() {
194 if ($this->mRegex == '' ) {
195 $this->initRegex();
196 }
197 return $this->mRegex;
198 }
199
200 /**
201 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
202 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
203 * the complete expression
204 */
205 function getRegexCase() {
206 if ( $this->mRegex === '' )
207 $this->initRegex();
208
209 return $this->mCaseSensitive ? '' : 'iu';
210 }
211
212 /**
213 * Gets a regex matching the word, if it is at the string start
214 */
215 function getRegexStart() {
216 if ($this->mRegex == '' ) {
217 $this->initRegex();
218 }
219 return $this->mRegexStart;
220 }
221
222 /**
223 * regex without the slashes and what not
224 */
225 function getBaseRegex() {
226 if ($this->mRegex == '') {
227 $this->initRegex();
228 }
229 return $this->mBaseRegex;
230 }
231
232 /**
233 * Returns true if the text contains the word
234 * @return bool
235 */
236 function match( $text ) {
237 return preg_match( $this->getRegex(), $text );
238 }
239
240 /**
241 * Returns true if the text starts with the word
242 * @return bool
243 */
244 function matchStart( $text ) {
245 return preg_match( $this->getRegexStart(), $text );
246 }
247
248 /**
249 * Returns NULL if there's no match, the value of $1 otherwise
250 * The return code is the matched string, if there's no variable
251 * part in the regex and the matched variable part ($1) if there
252 * is one.
253 */
254 function matchVariableStartToEnd( $text ) {
255 $matches = array();
256 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
257 if ( $matchcount == 0 ) {
258 return NULL;
259 } else {
260 # multiple matched parts (variable match); some will be empty because of
261 # synonyms. The variable will be the second non-empty one so remove any
262 # blank elements and re-sort the indices.
263 # See also bug 6526
264
265 $matches = array_values(array_filter($matches));
266
267 if ( count($matches) == 1 ) { return $matches[0]; }
268 else { return $matches[1]; }
269 }
270 }
271
272
273 /**
274 * Returns true if the text matches the word, and alters the
275 * input string, removing all instances of the word
276 */
277 function matchAndRemove( &$text ) {
278 $this->mFound = false;
279 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
280 return $this->mFound;
281 }
282
283 function matchStartAndRemove( &$text ) {
284 $this->mFound = false;
285 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
286 return $this->mFound;
287 }
288
289 /**
290 * Used in matchAndRemove()
291 * @private
292 **/
293 function pregRemoveAndRecord( ) {
294 $this->mFound = true;
295 return '';
296 }
297
298 /**
299 * Replaces the word with something else
300 */
301 function replace( $replacement, $subject, $limit=-1 ) {
302 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
303 $this->mModified = !($res === $subject);
304 return $res;
305 }
306
307 /**
308 * Variable handling: {{SUBST:xxx}} style words
309 * Calls back a function to determine what to replace xxx with
310 * Input word must contain $1
311 */
312 function substituteCallback( $text, $callback ) {
313 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
314 $this->mModified = !($res === $text);
315 return $res;
316 }
317
318 /**
319 * Matches the word, where $1 is a wildcard
320 */
321 function getVariableRegex() {
322 if ( $this->mVariableRegex == '' ) {
323 $this->initRegex();
324 }
325 return $this->mVariableRegex;
326 }
327
328 /**
329 * Matches the entire string, where $1 is a wildcard
330 */
331 function getVariableStartToEndRegex() {
332 if ( $this->mVariableStartToEndRegex == '' ) {
333 $this->initRegex();
334 }
335 return $this->mVariableStartToEndRegex;
336 }
337
338 /**
339 * Accesses the synonym list directly
340 */
341 function getSynonym( $i ) {
342 return $this->mSynonyms[$i];
343 }
344
345 function getSynonyms() {
346 return $this->mSynonyms;
347 }
348
349 /**
350 * Returns true if the last call to replace() or substituteCallback()
351 * returned a modified text, otherwise false.
352 */
353 function getWasModified(){
354 return $this->mModified;
355 }
356
357 /**
358 * $magicarr is an associative array of (magic word ID => replacement)
359 * This method uses the php feature to do several replacements at the same time,
360 * thereby gaining some efficiency. The result is placed in the out variable
361 * $result. The return value is true if something was replaced.
362 * @static
363 **/
364 function replaceMultiple( $magicarr, $subject, &$result ){
365 $search = array();
366 $replace = array();
367 foreach( $magicarr as $id => $replacement ){
368 $mw = MagicWord::get( $id );
369 $search[] = $mw->getRegex();
370 $replace[] = $replacement;
371 }
372
373 $result = preg_replace( $search, $replace, $subject );
374 return !($result === $subject);
375 }
376
377 /**
378 * Adds all the synonyms of this MagicWord to an array, to allow quick
379 * lookup in a list of magic words
380 */
381 function addToArray( &$array, $value ) {
382 global $wgContLang;
383 foreach ( $this->mSynonyms as $syn ) {
384 $array[$wgContLang->lc($syn)] = $value;
385 }
386 }
387
388 function isCaseSensitive() {
389 return $this->mCaseSensitive;
390 }
391 }
392
393 ?>