(bug 10336) Added new magic word {{REVISIONUSER}}, which displays the user name of...
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * File for magic words
4 * See docs/magicword.txt
5 *
6 * @file
7 * @ingroup Parser
8 */
9
10 /**
11 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
12 * Usage:
13 * if (MagicWord::get( 'redirect' )->match( $text ) )
14 *
15 * Possible future improvements:
16 * * Simultaneous searching for a number of magic words
17 * * MagicWord::$mObjects in shared memory
18 *
19 * Please avoid reading the data out of one of these objects and then writing
20 * special case code. If possible, add another match()-like function here.
21 *
22 * To add magic words in an extension, use the LanguageGetMagic hook. For
23 * magic words which are also Parser variables, add a MagicWordwgVariableIDs
24 * hook. Use string keys.
25 *
26 * @ingroup Parser
27 */
28 class MagicWord {
29 /**#@+
30 * @private
31 */
32 var $mId, $mSynonyms, $mCaseSensitive, $mRegex;
33 var $mRegexStart, $mBaseRegex, $mVariableRegex;
34 var $mModified, $mFound;
35
36 static public $mVariableIDsInitialised = false;
37 static public $mVariableIDs = array(
38 'currentmonth',
39 'currentmonthname',
40 'currentmonthnamegen',
41 'currentmonthabbrev',
42 'currentday',
43 'currentday2',
44 'currentdayname',
45 'currentyear',
46 'currenttime',
47 'currenthour',
48 'localmonth',
49 'localmonthname',
50 'localmonthnamegen',
51 'localmonthabbrev',
52 'localday',
53 'localday2',
54 'localdayname',
55 'localyear',
56 'localtime',
57 'localhour',
58 'numberofarticles',
59 'numberoffiles',
60 'numberofedits',
61 'sitename',
62 'server',
63 'servername',
64 'scriptpath',
65 'pagename',
66 'pagenamee',
67 'fullpagename',
68 'fullpagenamee',
69 'namespace',
70 'namespacee',
71 'currentweek',
72 'currentdow',
73 'localweek',
74 'localdow',
75 'revisionid',
76 'revisionday',
77 'revisionday2',
78 'revisionmonth',
79 'revisionyear',
80 'revisiontimestamp',
81 'revisionuser',
82 'subpagename',
83 'subpagenamee',
84 'displaytitle',
85 'talkspace',
86 'talkspacee',
87 'subjectspace',
88 'subjectspacee',
89 'talkpagename',
90 'talkpagenamee',
91 'subjectpagename',
92 'subjectpagenamee',
93 'numberofusers',
94 'numberofactiveusers',
95 'newsectionlink',
96 'nonewsectionlink',
97 'numberofpages',
98 'currentversion',
99 'basepagename',
100 'basepagenamee',
101 'urlencode',
102 'currenttimestamp',
103 'localtimestamp',
104 'directionmark',
105 'language',
106 'contentlanguage',
107 'pagesinnamespace',
108 'numberofadmins',
109 'numberofviews',
110 'defaultsort',
111 'pagesincategory',
112 'index',
113 'noindex',
114 'numberingroup',
115 );
116
117 /* Array of caching hints for ParserCache */
118 static public $mCacheTTLs = array (
119 'currentmonth' => 86400,
120 'currentmonthname' => 86400,
121 'currentmonthnamegen' => 86400,
122 'currentmonthabbrev' => 86400,
123 'currentday' => 3600,
124 'currentday2' => 3600,
125 'currentdayname' => 3600,
126 'currentyear' => 86400,
127 'currenttime' => 3600,
128 'currenthour' => 3600,
129 'localmonth' => 86400,
130 'localmonthname' => 86400,
131 'localmonthnamegen' => 86400,
132 'localmonthabbrev' => 86400,
133 'localday' => 3600,
134 'localday2' => 3600,
135 'localdayname' => 3600,
136 'localyear' => 86400,
137 'localtime' => 3600,
138 'localhour' => 3600,
139 'numberofarticles' => 3600,
140 'numberoffiles' => 3600,
141 'numberofedits' => 3600,
142 'currentweek' => 3600,
143 'currentdow' => 3600,
144 'localweek' => 3600,
145 'localdow' => 3600,
146 'numberofusers' => 3600,
147 'numberofactiveusers' => 3600,
148 'numberofpages' => 3600,
149 'currentversion' => 86400,
150 'currenttimestamp' => 3600,
151 'localtimestamp' => 3600,
152 'pagesinnamespace' => 3600,
153 'numberofadmins' => 3600,
154 'numberofviews' => 3600,
155 'numberingroup' => 3600,
156 );
157
158 static public $mDoubleUnderscoreIDs = array(
159 'notoc',
160 'nogallery',
161 'forcetoc',
162 'toc',
163 'noeditsection',
164 'newsectionlink',
165 'nonewsectionlink',
166 'hiddencat',
167 'index',
168 'noindex',
169 'staticredirect',
170 );
171
172
173 static public $mObjects = array();
174 static public $mDoubleUnderscoreArray = null;
175
176 /**#@-*/
177
178 function __construct($id = 0, $syn = '', $cs = false) {
179 $this->mId = $id;
180 $this->mSynonyms = (array)$syn;
181 $this->mCaseSensitive = $cs;
182 $this->mRegex = '';
183 $this->mRegexStart = '';
184 $this->mVariableRegex = '';
185 $this->mVariableStartToEndRegex = '';
186 $this->mModified = false;
187 }
188
189 /**
190 * Factory: creates an object representing an ID
191 * @static
192 */
193 static function &get( $id ) {
194 wfProfileIn( __METHOD__ );
195 if (!array_key_exists( $id, self::$mObjects ) ) {
196 $mw = new MagicWord();
197 $mw->load( $id );
198 self::$mObjects[$id] = $mw;
199 }
200 wfProfileOut( __METHOD__ );
201 return self::$mObjects[$id];
202 }
203
204 /**
205 * Get an array of parser variable IDs
206 */
207 static function getVariableIDs() {
208 if ( !self::$mVariableIDsInitialised ) {
209 # Deprecated constant definition hook, available for extensions that need it
210 $magicWords = array();
211 wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) );
212 foreach ( $magicWords as $word ) {
213 define( $word, $word );
214 }
215
216 # Get variable IDs
217 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
218 self::$mVariableIDsInitialised = true;
219 }
220 return self::$mVariableIDs;
221 }
222
223 /* Allow external reads of TTL array */
224 static function getCacheTTL($id) {
225 if (array_key_exists($id,self::$mCacheTTLs)) {
226 return self::$mCacheTTLs[$id];
227 } else {
228 return -1;
229 }
230 }
231
232 /** Get a MagicWordArray of double-underscore entities */
233 static function getDoubleUnderscoreArray() {
234 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
235 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
236 }
237 return self::$mDoubleUnderscoreArray;
238 }
239
240 # Initialises this object with an ID
241 function load( $id ) {
242 global $wgContLang;
243 $this->mId = $id;
244 $wgContLang->getMagic( $this );
245 if ( !$this->mSynonyms ) {
246 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
247 #throw new MWException( "Error: invalid magic word '$id'" );
248 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
249 }
250 }
251
252 /**
253 * Preliminary initialisation
254 * @private
255 */
256 function initRegex() {
257 #$variableClass = Title::legalChars();
258 # This was used for matching "$1" variables, but different uses of the feature will have
259 # different restrictions, which should be checked *after* the MagicWord has been matched,
260 # not here. - IMSoP
261
262 $escSyn = array();
263 foreach ( $this->mSynonyms as $synonym )
264 // In case a magic word contains /, like that's going to happen;)
265 $escSyn[] = preg_quote( $synonym, '/' );
266 $this->mBaseRegex = implode( '|', $escSyn );
267
268 $case = $this->mCaseSensitive ? '' : 'iu';
269 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
270 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
271 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
272 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
273 "/^(?:{$this->mBaseRegex})$/{$case}" );
274 }
275
276 /**
277 * Gets a regex representing matching the word
278 */
279 function getRegex() {
280 if ($this->mRegex == '' ) {
281 $this->initRegex();
282 }
283 return $this->mRegex;
284 }
285
286 /**
287 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
288 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
289 * the complete expression
290 */
291 function getRegexCase() {
292 if ( $this->mRegex === '' )
293 $this->initRegex();
294
295 return $this->mCaseSensitive ? '' : 'iu';
296 }
297
298 /**
299 * Gets a regex matching the word, if it is at the string start
300 */
301 function getRegexStart() {
302 if ($this->mRegex == '' ) {
303 $this->initRegex();
304 }
305 return $this->mRegexStart;
306 }
307
308 /**
309 * regex without the slashes and what not
310 */
311 function getBaseRegex() {
312 if ($this->mRegex == '') {
313 $this->initRegex();
314 }
315 return $this->mBaseRegex;
316 }
317
318 /**
319 * Returns true if the text contains the word
320 * @return bool
321 */
322 function match( $text ) {
323 return preg_match( $this->getRegex(), $text );
324 }
325
326 /**
327 * Returns true if the text starts with the word
328 * @return bool
329 */
330 function matchStart( $text ) {
331 return preg_match( $this->getRegexStart(), $text );
332 }
333
334 /**
335 * Returns NULL if there's no match, the value of $1 otherwise
336 * The return code is the matched string, if there's no variable
337 * part in the regex and the matched variable part ($1) if there
338 * is one.
339 */
340 function matchVariableStartToEnd( $text ) {
341 $matches = array();
342 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
343 if ( $matchcount == 0 ) {
344 return NULL;
345 } else {
346 # multiple matched parts (variable match); some will be empty because of
347 # synonyms. The variable will be the second non-empty one so remove any
348 # blank elements and re-sort the indices.
349 # See also bug 6526
350
351 $matches = array_values(array_filter($matches));
352
353 if ( count($matches) == 1 ) { return $matches[0]; }
354 else { return $matches[1]; }
355 }
356 }
357
358
359 /**
360 * Returns true if the text matches the word, and alters the
361 * input string, removing all instances of the word
362 */
363 function matchAndRemove( &$text ) {
364 $this->mFound = false;
365 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
366 return $this->mFound;
367 }
368
369 function matchStartAndRemove( &$text ) {
370 $this->mFound = false;
371 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
372 return $this->mFound;
373 }
374
375 /**
376 * Used in matchAndRemove()
377 * @private
378 **/
379 function pregRemoveAndRecord( ) {
380 $this->mFound = true;
381 return '';
382 }
383
384 /**
385 * Replaces the word with something else
386 */
387 function replace( $replacement, $subject, $limit=-1 ) {
388 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
389 $this->mModified = !($res === $subject);
390 return $res;
391 }
392
393 /**
394 * Variable handling: {{SUBST:xxx}} style words
395 * Calls back a function to determine what to replace xxx with
396 * Input word must contain $1
397 */
398 function substituteCallback( $text, $callback ) {
399 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
400 $this->mModified = !($res === $text);
401 return $res;
402 }
403
404 /**
405 * Matches the word, where $1 is a wildcard
406 */
407 function getVariableRegex() {
408 if ( $this->mVariableRegex == '' ) {
409 $this->initRegex();
410 }
411 return $this->mVariableRegex;
412 }
413
414 /**
415 * Matches the entire string, where $1 is a wildcard
416 */
417 function getVariableStartToEndRegex() {
418 if ( $this->mVariableStartToEndRegex == '' ) {
419 $this->initRegex();
420 }
421 return $this->mVariableStartToEndRegex;
422 }
423
424 /**
425 * Accesses the synonym list directly
426 */
427 function getSynonym( $i ) {
428 return $this->mSynonyms[$i];
429 }
430
431 function getSynonyms() {
432 return $this->mSynonyms;
433 }
434
435 /**
436 * Returns true if the last call to replace() or substituteCallback()
437 * returned a modified text, otherwise false.
438 */
439 function getWasModified(){
440 return $this->mModified;
441 }
442
443 /**
444 * $magicarr is an associative array of (magic word ID => replacement)
445 * This method uses the php feature to do several replacements at the same time,
446 * thereby gaining some efficiency. The result is placed in the out variable
447 * $result. The return value is true if something was replaced.
448 * @static
449 **/
450 function replaceMultiple( $magicarr, $subject, &$result ){
451 $search = array();
452 $replace = array();
453 foreach( $magicarr as $id => $replacement ){
454 $mw = MagicWord::get( $id );
455 $search[] = $mw->getRegex();
456 $replace[] = $replacement;
457 }
458
459 $result = preg_replace( $search, $replace, $subject );
460 return !($result === $subject);
461 }
462
463 /**
464 * Adds all the synonyms of this MagicWord to an array, to allow quick
465 * lookup in a list of magic words
466 */
467 function addToArray( &$array, $value ) {
468 global $wgContLang;
469 foreach ( $this->mSynonyms as $syn ) {
470 $array[$wgContLang->lc($syn)] = $value;
471 }
472 }
473
474 function isCaseSensitive() {
475 return $this->mCaseSensitive;
476 }
477
478 function getId() {
479 return $this->mId;
480 }
481 }
482
483 /**
484 * Class for handling an array of magic words
485 * @ingroup Parser
486 */
487 class MagicWordArray {
488 var $names = array();
489 var $hash;
490 var $baseRegex, $regex;
491 var $matches;
492
493 function __construct( $names = array() ) {
494 $this->names = $names;
495 }
496
497 /**
498 * Add a magic word by name
499 */
500 public function add( $name ) {
501 global $wgContLang;
502 $this->names[] = $name;
503 $this->hash = $this->baseRegex = $this->regex = null;
504 }
505
506 /**
507 * Add a number of magic words by name
508 */
509 public function addArray( $names ) {
510 $this->names = array_merge( $this->names, array_values( $names ) );
511 $this->hash = $this->baseRegex = $this->regex = null;
512 }
513
514 /**
515 * Get a 2-d hashtable for this array
516 */
517 function getHash() {
518 if ( is_null( $this->hash ) ) {
519 global $wgContLang;
520 $this->hash = array( 0 => array(), 1 => array() );
521 foreach ( $this->names as $name ) {
522 $magic = MagicWord::get( $name );
523 $case = intval( $magic->isCaseSensitive() );
524 foreach ( $magic->getSynonyms() as $syn ) {
525 if ( !$case ) {
526 $syn = $wgContLang->lc( $syn );
527 }
528 $this->hash[$case][$syn] = $name;
529 }
530 }
531 }
532 return $this->hash;
533 }
534
535 /**
536 * Get the base regex
537 */
538 function getBaseRegex() {
539 if ( is_null( $this->baseRegex ) ) {
540 $this->baseRegex = array( 0 => '', 1 => '' );
541 foreach ( $this->names as $name ) {
542 $magic = MagicWord::get( $name );
543 $case = intval( $magic->isCaseSensitive() );
544 foreach ( $magic->getSynonyms() as $i => $syn ) {
545 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
546 if ( $this->baseRegex[$case] === '' ) {
547 $this->baseRegex[$case] = $group;
548 } else {
549 $this->baseRegex[$case] .= '|' . $group;
550 }
551 }
552 }
553 }
554 return $this->baseRegex;
555 }
556
557 /**
558 * Get an unanchored regex
559 */
560 function getRegex() {
561 if ( is_null( $this->regex ) ) {
562 $base = $this->getBaseRegex();
563 $this->regex = array( '', '' );
564 if ( $this->baseRegex[0] !== '' ) {
565 $this->regex[0] = "/{$base[0]}/iuS";
566 }
567 if ( $this->baseRegex[1] !== '' ) {
568 $this->regex[1] = "/{$base[1]}/S";
569 }
570 }
571 return $this->regex;
572 }
573
574 /**
575 * Get a regex for matching variables
576 */
577 function getVariableRegex() {
578 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
579 }
580
581 /**
582 * Get an anchored regex for matching variables
583 */
584 function getVariableStartToEndRegex() {
585 $base = $this->getBaseRegex();
586 $newRegex = array( '', '' );
587 if ( $base[0] !== '' ) {
588 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
589 }
590 if ( $base[1] !== '' ) {
591 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
592 }
593 return $newRegex;
594 }
595
596 /**
597 * Parse a match array from preg_match
598 * Returns array(magic word ID, parameter value)
599 * If there is no parameter value, that element will be false.
600 */
601 function parseMatch( $m ) {
602 reset( $m );
603 while ( list( $key, $value ) = each( $m ) ) {
604 if ( $key === 0 || $value === '' ) {
605 continue;
606 }
607 $parts = explode( '_', $key, 2 );
608 if ( count( $parts ) != 2 ) {
609 // This shouldn't happen
610 // continue;
611 throw new MWException( __METHOD__ . ': bad parameter name' );
612 }
613 list( /* $synIndex */, $magicName ) = $parts;
614 $paramValue = next( $m );
615 return array( $magicName, $paramValue );
616 }
617 // This shouldn't happen either
618 throw new MWException( __METHOD__.': parameter not found' );
619 return array( false, false );
620 }
621
622 /**
623 * Match some text, with parameter capture
624 * Returns an array with the magic word name in the first element and the
625 * parameter in the second element.
626 * Both elements are false if there was no match.
627 */
628 public function matchVariableStartToEnd( $text ) {
629 global $wgContLang;
630 $regexes = $this->getVariableStartToEndRegex();
631 foreach ( $regexes as $regex ) {
632 if ( $regex !== '' ) {
633 $m = false;
634 if ( preg_match( $regex, $text, $m ) ) {
635 return $this->parseMatch( $m );
636 }
637 }
638 }
639 return array( false, false );
640 }
641
642 /**
643 * Match some text, without parameter capture
644 * Returns the magic word name, or false if there was no capture
645 */
646 public function matchStartToEnd( $text ) {
647 $hash = $this->getHash();
648 if ( isset( $hash[1][$text] ) ) {
649 return $hash[1][$text];
650 }
651 global $wgContLang;
652 $lc = $wgContLang->lc( $text );
653 if ( isset( $hash[0][$lc] ) ) {
654 return $hash[0][$lc];
655 }
656 return false;
657 }
658
659 /**
660 * Returns an associative array, ID => param value, for all items that match
661 * Removes the matched items from the input string (passed by reference)
662 */
663 public function matchAndRemove( &$text ) {
664 $found = array();
665 $regexes = $this->getRegex();
666 foreach ( $regexes as $regex ) {
667 if ( $regex === '' ) {
668 continue;
669 }
670 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
671 foreach ( $matches as $m ) {
672 list( $name, $param ) = $this->parseMatch( $m );
673 $found[$name] = $param;
674 }
675 $text = preg_replace( $regex, '', $text );
676 }
677 return $found;
678 }
679 }