Remove support for the deprecated and no longer used in trunk hook MagicWordMagicWords.
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * File for magic words
4 *
5 * See docs/magicword.txt
6 *
7 * @file
8 * @ingroup Parser
9 */
10
11 /**
12 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
13 * Usage:
14 * if (MagicWord::get( 'redirect' )->match( $text ) )
15 *
16 * Possible future improvements:
17 * * Simultaneous searching for a number of magic words
18 * * MagicWord::$mObjects in shared memory
19 *
20 * Please avoid reading the data out of one of these objects and then writing
21 * special case code. If possible, add another match()-like function here.
22 *
23 * To add magic words in an extension, use $magicWords in a file listed in
24 * $wgExtensionMessagesFiles[].
25 *
26 * Example:
27 *
28 * $magicWords = array();
29 *
30 * $magicWords['en'] = array(
31 * 'magicwordkey' => array( 0, 'case_insensitive_magic_word' ),
32 * 'magicwordkey2' => array( 1, 'CASE_sensitive_magic_word2' ),
33 * );
34 *
35 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
36 * hook. Use string keys.
37 *
38 * @ingroup Parser
39 */
40 class MagicWord {
41 /**#@+
42 * @private
43 */
44 var $mId, $mSynonyms, $mCaseSensitive;
45 var $mRegex = '';
46 var $mRegexStart = '';
47 var $mBaseRegex = '';
48 var $mVariableRegex = '';
49 var $mVariableStartToEndRegex = '';
50 var $mModified = false;
51 var $mFound = false;
52
53 static public $mVariableIDsInitialised = false;
54 static public $mVariableIDs = array(
55 'currentmonth',
56 'currentmonth1',
57 'currentmonthname',
58 'currentmonthnamegen',
59 'currentmonthabbrev',
60 'currentday',
61 'currentday2',
62 'currentdayname',
63 'currentyear',
64 'currenttime',
65 'currenthour',
66 'localmonth',
67 'localmonth1',
68 'localmonthname',
69 'localmonthnamegen',
70 'localmonthabbrev',
71 'localday',
72 'localday2',
73 'localdayname',
74 'localyear',
75 'localtime',
76 'localhour',
77 'numberofarticles',
78 'numberoffiles',
79 'numberofedits',
80 'articlepath',
81 'sitename',
82 'server',
83 'servername',
84 'scriptpath',
85 'stylepath',
86 'pagename',
87 'pagenamee',
88 'fullpagename',
89 'fullpagenamee',
90 'namespace',
91 'namespacee',
92 'currentweek',
93 'currentdow',
94 'localweek',
95 'localdow',
96 'revisionid',
97 'revisionday',
98 'revisionday2',
99 'revisionmonth',
100 'revisionmonth1',
101 'revisionyear',
102 'revisiontimestamp',
103 'revisionuser',
104 'subpagename',
105 'subpagenamee',
106 'talkspace',
107 'talkspacee',
108 'subjectspace',
109 'subjectspacee',
110 'talkpagename',
111 'talkpagenamee',
112 'subjectpagename',
113 'subjectpagenamee',
114 'numberofusers',
115 'numberofactiveusers',
116 'numberofpages',
117 'currentversion',
118 'basepagename',
119 'basepagenamee',
120 'currenttimestamp',
121 'localtimestamp',
122 'directionmark',
123 'contentlanguage',
124 'numberofadmins',
125 'numberofviews',
126 );
127
128 /* Array of caching hints for ParserCache */
129 static public $mCacheTTLs = array (
130 'currentmonth' => 86400,
131 'currentmonth1' => 86400,
132 'currentmonthname' => 86400,
133 'currentmonthnamegen' => 86400,
134 'currentmonthabbrev' => 86400,
135 'currentday' => 3600,
136 'currentday2' => 3600,
137 'currentdayname' => 3600,
138 'currentyear' => 86400,
139 'currenttime' => 3600,
140 'currenthour' => 3600,
141 'localmonth' => 86400,
142 'localmonth1' => 86400,
143 'localmonthname' => 86400,
144 'localmonthnamegen' => 86400,
145 'localmonthabbrev' => 86400,
146 'localday' => 3600,
147 'localday2' => 3600,
148 'localdayname' => 3600,
149 'localyear' => 86400,
150 'localtime' => 3600,
151 'localhour' => 3600,
152 'numberofarticles' => 3600,
153 'numberoffiles' => 3600,
154 'numberofedits' => 3600,
155 'currentweek' => 3600,
156 'currentdow' => 3600,
157 'localweek' => 3600,
158 'localdow' => 3600,
159 'numberofusers' => 3600,
160 'numberofactiveusers' => 3600,
161 'numberofpages' => 3600,
162 'currentversion' => 86400,
163 'currenttimestamp' => 3600,
164 'localtimestamp' => 3600,
165 'pagesinnamespace' => 3600,
166 'numberofadmins' => 3600,
167 'numberofviews' => 3600,
168 'numberingroup' => 3600,
169 );
170
171 static public $mDoubleUnderscoreIDs = array(
172 'notoc',
173 'nogallery',
174 'forcetoc',
175 'toc',
176 'noeditsection',
177 'newsectionlink',
178 'nonewsectionlink',
179 'hiddencat',
180 'index',
181 'noindex',
182 'staticredirect',
183 'notitleconvert',
184 'nocontentconvert',
185 );
186
187 static public $mSubstIDs = array(
188 'subst',
189 'safesubst',
190 );
191
192 static public $mObjects = array();
193 static public $mDoubleUnderscoreArray = null;
194
195 /**#@-*/
196
197 function __construct($id = 0, $syn = array(), $cs = false) {
198 $this->mId = $id;
199 $this->mSynonyms = (array)$syn;
200 $this->mCaseSensitive = $cs;
201 }
202
203 /**
204 * Factory: creates an object representing an ID
205 *
206 * @param $id
207 *
208 * @return MagicWord
209 */
210 static function &get( $id ) {
211 if ( !isset( self::$mObjects[$id] ) ) {
212 $mw = new MagicWord();
213 $mw->load( $id );
214 self::$mObjects[$id] = $mw;
215 }
216 return self::$mObjects[$id];
217 }
218
219 /**
220 * Get an array of parser variable IDs
221 *
222 * @return array
223 */
224 static function getVariableIDs() {
225 if ( !self::$mVariableIDsInitialised ) {
226 # Get variable IDs
227 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
228 self::$mVariableIDsInitialised = true;
229 }
230 return self::$mVariableIDs;
231 }
232
233 /**
234 * Get an array of parser substitution modifier IDs
235 * @return array
236 */
237 static function getSubstIDs() {
238 return self::$mSubstIDs;
239 }
240
241 /**
242 * Allow external reads of TTL array
243 *
244 * @param $id int
245 * @return array
246 */
247 static function getCacheTTL( $id ) {
248 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
249 return self::$mCacheTTLs[$id];
250 } else {
251 return -1;
252 }
253 }
254
255 /**
256 * Get a MagicWordArray of double-underscore entities
257 *
258 * @return MagicWordArray
259 */
260 static function getDoubleUnderscoreArray() {
261 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
262 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
263 }
264 return self::$mDoubleUnderscoreArray;
265 }
266
267 /**
268 * Clear the self::$mObjects variable
269 * For use in parser tests
270 */
271 public static function clearCache() {
272 self::$mObjects = array();
273 }
274
275 /**
276 * Initialises this object with an ID
277 *
278 * @param $id
279 */
280 function load( $id ) {
281 global $wgContLang;
282 wfProfileIn( __METHOD__ );
283 $this->mId = $id;
284 $wgContLang->getMagic( $this );
285 if ( !$this->mSynonyms ) {
286 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
287 #throw new MWException( "Error: invalid magic word '$id'" );
288 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
289 }
290 wfProfileOut( __METHOD__ );
291 }
292
293 /**
294 * Preliminary initialisation
295 * @private
296 */
297 function initRegex() {
298 // Sort the synonyms by length, descending, so that the longest synonym
299 // matches in precedence to the shortest
300 $synonyms = $this->mSynonyms;
301 usort( $synonyms, array( $this, 'compareStringLength' ) );
302
303 $escSyn = array();
304 foreach ( $synonyms as $synonym )
305 // In case a magic word contains /, like that's going to happen;)
306 $escSyn[] = preg_quote( $synonym, '/' );
307 $this->mBaseRegex = implode( '|', $escSyn );
308
309 $case = $this->mCaseSensitive ? '' : 'iu';
310 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
311 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
312 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
313 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
314 "/^(?:{$this->mBaseRegex})$/{$case}" );
315 }
316
317 /**
318 * A comparison function that returns -1, 0 or 1 depending on whether the
319 * first string is longer, the same length or shorter than the second
320 * string.
321 *
322 * @param $s1 string
323 * @param $s2 string
324 *
325 * @return int
326 */
327 function compareStringLength( $s1, $s2 ) {
328 $l1 = strlen( $s1 );
329 $l2 = strlen( $s2 );
330 if ( $l1 < $l2 ) {
331 return 1;
332 } elseif ( $l1 > $l2 ) {
333 return -1;
334 } else {
335 return 0;
336 }
337 }
338
339 /**
340 * Gets a regex representing matching the word
341 *
342 * @return string
343 */
344 function getRegex() {
345 if ($this->mRegex == '' ) {
346 $this->initRegex();
347 }
348 return $this->mRegex;
349 }
350
351 /**
352 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
353 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
354 * the complete expression
355 *
356 * @return string
357 */
358 function getRegexCase() {
359 if ( $this->mRegex === '' )
360 $this->initRegex();
361
362 return $this->mCaseSensitive ? '' : 'iu';
363 }
364
365 /**
366 * Gets a regex matching the word, if it is at the string start
367 *
368 * @return string
369 */
370 function getRegexStart() {
371 if ($this->mRegex == '' ) {
372 $this->initRegex();
373 }
374 return $this->mRegexStart;
375 }
376
377 /**
378 * regex without the slashes and what not
379 *
380 * @return string
381 */
382 function getBaseRegex() {
383 if ($this->mRegex == '') {
384 $this->initRegex();
385 }
386 return $this->mBaseRegex;
387 }
388
389 /**
390 * Returns true if the text contains the word
391 *
392 * @param $text string
393 *
394 * @return bool
395 */
396 function match( $text ) {
397 return (bool)preg_match( $this->getRegex(), $text );
398 }
399
400 /**
401 * Returns true if the text starts with the word
402 *
403 * @param $text string
404 *
405 * @return bool
406 */
407 function matchStart( $text ) {
408 return (bool)preg_match( $this->getRegexStart(), $text );
409 }
410
411 /**
412 * Returns NULL if there's no match, the value of $1 otherwise
413 * The return code is the matched string, if there's no variable
414 * part in the regex and the matched variable part ($1) if there
415 * is one.
416 *
417 * @param $text string
418 *
419 * @return string
420 */
421 function matchVariableStartToEnd( $text ) {
422 $matches = array();
423 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
424 if ( $matchcount == 0 ) {
425 return null;
426 } else {
427 # multiple matched parts (variable match); some will be empty because of
428 # synonyms. The variable will be the second non-empty one so remove any
429 # blank elements and re-sort the indices.
430 # See also bug 6526
431
432 $matches = array_values(array_filter($matches));
433
434 if ( count($matches) == 1 ) {
435 return $matches[0];
436 } else {
437 return $matches[1];
438 }
439 }
440 }
441
442
443 /**
444 * Returns true if the text matches the word, and alters the
445 * input string, removing all instances of the word
446 *
447 * @param $text string
448 *
449 * @return bool
450 */
451 function matchAndRemove( &$text ) {
452 $this->mFound = false;
453 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
454 return $this->mFound;
455 }
456
457 /**
458 * @param $text
459 * @return bool
460 */
461 function matchStartAndRemove( &$text ) {
462 $this->mFound = false;
463 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
464 return $this->mFound;
465 }
466
467 /**
468 * Used in matchAndRemove()
469 *
470 * @return string
471 */
472 function pregRemoveAndRecord() {
473 $this->mFound = true;
474 return '';
475 }
476
477 /**
478 * Replaces the word with something else
479 *
480 * @param $replacement
481 * @param $subject
482 * @param $limit int
483 *
484 * @return string
485 */
486 function replace( $replacement, $subject, $limit = -1 ) {
487 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
488 $this->mModified = !($res === $subject);
489 return $res;
490 }
491
492 /**
493 * Variable handling: {{SUBST:xxx}} style words
494 * Calls back a function to determine what to replace xxx with
495 * Input word must contain $1
496 *
497 * @param $text string
498 * @param $callback
499 *
500 * @return string
501 */
502 function substituteCallback( $text, $callback ) {
503 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
504 $this->mModified = !($res === $text);
505 return $res;
506 }
507
508 /**
509 * Matches the word, where $1 is a wildcard
510 *
511 * @return string
512 */
513 function getVariableRegex() {
514 if ( $this->mVariableRegex == '' ) {
515 $this->initRegex();
516 }
517 return $this->mVariableRegex;
518 }
519
520 /**
521 * Matches the entire string, where $1 is a wildcard
522 *
523 * @return string
524 */
525 function getVariableStartToEndRegex() {
526 if ( $this->mVariableStartToEndRegex == '' ) {
527 $this->initRegex();
528 }
529 return $this->mVariableStartToEndRegex;
530 }
531
532 /**
533 * Accesses the synonym list directly
534 *
535 * @param $i int
536 *
537 * @return string
538 */
539 function getSynonym( $i ) {
540 return $this->mSynonyms[$i];
541 }
542
543 /**
544 * @return array
545 */
546 function getSynonyms() {
547 return $this->mSynonyms;
548 }
549
550 /**
551 * Returns true if the last call to replace() or substituteCallback()
552 * returned a modified text, otherwise false.
553 *
554 * @return bool
555 */
556 function getWasModified(){
557 return $this->mModified;
558 }
559
560 /**
561 * $magicarr is an associative array of (magic word ID => replacement)
562 * This method uses the php feature to do several replacements at the same time,
563 * thereby gaining some efficiency. The result is placed in the out variable
564 * $result. The return value is true if something was replaced.
565 * @todo Should this be static? It doesn't seem to be used at all
566 *
567 * @param $magicarr
568 * @param $subject
569 * @param $result
570 *
571 * @return bool
572 */
573 function replaceMultiple( $magicarr, $subject, &$result ){
574 $search = array();
575 $replace = array();
576 foreach( $magicarr as $id => $replacement ){
577 $mw = MagicWord::get( $id );
578 $search[] = $mw->getRegex();
579 $replace[] = $replacement;
580 }
581
582 $result = preg_replace( $search, $replace, $subject );
583 return !($result === $subject);
584 }
585
586 /**
587 * Adds all the synonyms of this MagicWord to an array, to allow quick
588 * lookup in a list of magic words
589 *
590 * @param $array
591 * @param $value
592 */
593 function addToArray( &$array, $value ) {
594 global $wgContLang;
595 foreach ( $this->mSynonyms as $syn ) {
596 $array[$wgContLang->lc($syn)] = $value;
597 }
598 }
599
600 /**
601 * @return bool
602 */
603 function isCaseSensitive() {
604 return $this->mCaseSensitive;
605 }
606
607 /**
608 * @return int
609 */
610 function getId() {
611 return $this->mId;
612 }
613 }
614
615 /**
616 * Class for handling an array of magic words
617 * @ingroup Parser
618 */
619 class MagicWordArray {
620 var $names = array();
621 var $hash;
622 var $baseRegex, $regex;
623 var $matches;
624
625 function __construct( $names = array() ) {
626 $this->names = $names;
627 }
628
629 /**
630 * Add a magic word by name
631 *
632 * @param $name string
633 */
634 public function add( $name ) {
635 $this->names[] = $name;
636 $this->hash = $this->baseRegex = $this->regex = null;
637 }
638
639 /**
640 * Add a number of magic words by name
641 *
642 * @param $names array
643 */
644 public function addArray( $names ) {
645 $this->names = array_merge( $this->names, array_values( $names ) );
646 $this->hash = $this->baseRegex = $this->regex = null;
647 }
648
649 /**
650 * Get a 2-d hashtable for this array
651 */
652 function getHash() {
653 if ( is_null( $this->hash ) ) {
654 global $wgContLang;
655 $this->hash = array( 0 => array(), 1 => array() );
656 foreach ( $this->names as $name ) {
657 $magic = MagicWord::get( $name );
658 $case = intval( $magic->isCaseSensitive() );
659 foreach ( $magic->getSynonyms() as $syn ) {
660 if ( !$case ) {
661 $syn = $wgContLang->lc( $syn );
662 }
663 $this->hash[$case][$syn] = $name;
664 }
665 }
666 }
667 return $this->hash;
668 }
669
670 /**
671 * Get the base regex
672 */
673 function getBaseRegex() {
674 if ( is_null( $this->baseRegex ) ) {
675 $this->baseRegex = array( 0 => '', 1 => '' );
676 foreach ( $this->names as $name ) {
677 $magic = MagicWord::get( $name );
678 $case = intval( $magic->isCaseSensitive() );
679 foreach ( $magic->getSynonyms() as $i => $syn ) {
680 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
681 if ( $this->baseRegex[$case] === '' ) {
682 $this->baseRegex[$case] = $group;
683 } else {
684 $this->baseRegex[$case] .= '|' . $group;
685 }
686 }
687 }
688 }
689 return $this->baseRegex;
690 }
691
692 /**
693 * Get an unanchored regex that does not match parameters
694 */
695 function getRegex() {
696 if ( is_null( $this->regex ) ) {
697 $base = $this->getBaseRegex();
698 $this->regex = array( '', '' );
699 if ( $this->baseRegex[0] !== '' ) {
700 $this->regex[0] = "/{$base[0]}/iuS";
701 }
702 if ( $this->baseRegex[1] !== '' ) {
703 $this->regex[1] = "/{$base[1]}/S";
704 }
705 }
706 return $this->regex;
707 }
708
709 /**
710 * Get a regex for matching variables with parameters
711 *
712 * @return string
713 */
714 function getVariableRegex() {
715 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
716 }
717
718 /**
719 * Get a regex anchored to the start of the string that does not match parameters
720 *
721 * @return array
722 */
723 function getRegexStart() {
724 $base = $this->getBaseRegex();
725 $newRegex = array( '', '' );
726 if ( $base[0] !== '' ) {
727 $newRegex[0] = "/^(?:{$base[0]})/iuS";
728 }
729 if ( $base[1] !== '' ) {
730 $newRegex[1] = "/^(?:{$base[1]})/S";
731 }
732 return $newRegex;
733 }
734
735 /**
736 * Get an anchored regex for matching variables with parameters
737 *
738 * @return array
739 */
740 function getVariableStartToEndRegex() {
741 $base = $this->getBaseRegex();
742 $newRegex = array( '', '' );
743 if ( $base[0] !== '' ) {
744 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
745 }
746 if ( $base[1] !== '' ) {
747 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
748 }
749 return $newRegex;
750 }
751
752 /**
753 * Parse a match array from preg_match
754 * Returns array(magic word ID, parameter value)
755 * If there is no parameter value, that element will be false.
756 *
757 * @param $m array
758 *
759 * @return array
760 */
761 function parseMatch( $m ) {
762 reset( $m );
763 while ( list( $key, $value ) = each( $m ) ) {
764 if ( $key === 0 || $value === '' ) {
765 continue;
766 }
767 $parts = explode( '_', $key, 2 );
768 if ( count( $parts ) != 2 ) {
769 // This shouldn't happen
770 // continue;
771 throw new MWException( __METHOD__ . ': bad parameter name' );
772 }
773 list( /* $synIndex */, $magicName ) = $parts;
774 $paramValue = next( $m );
775 return array( $magicName, $paramValue );
776 }
777 // This shouldn't happen either
778 throw new MWException( __METHOD__.': parameter not found' );
779 }
780
781 /**
782 * Match some text, with parameter capture
783 * Returns an array with the magic word name in the first element and the
784 * parameter in the second element.
785 * Both elements are false if there was no match.
786 *
787 * @param $text string
788 *
789 * @return array
790 */
791 public function matchVariableStartToEnd( $text ) {
792 $regexes = $this->getVariableStartToEndRegex();
793 foreach ( $regexes as $regex ) {
794 if ( $regex !== '' ) {
795 $m = false;
796 if ( preg_match( $regex, $text, $m ) ) {
797 return $this->parseMatch( $m );
798 }
799 }
800 }
801 return array( false, false );
802 }
803
804 /**
805 * Match some text, without parameter capture
806 * Returns the magic word name, or false if there was no capture
807 *
808 * @param $text string
809 *
810 * @return string|false
811 */
812 public function matchStartToEnd( $text ) {
813 $hash = $this->getHash();
814 if ( isset( $hash[1][$text] ) ) {
815 return $hash[1][$text];
816 }
817 global $wgContLang;
818 $lc = $wgContLang->lc( $text );
819 if ( isset( $hash[0][$lc] ) ) {
820 return $hash[0][$lc];
821 }
822 return false;
823 }
824
825 /**
826 * Returns an associative array, ID => param value, for all items that match
827 * Removes the matched items from the input string (passed by reference)
828 *
829 * @param $text string
830 *
831 * @return array
832 */
833 public function matchAndRemove( &$text ) {
834 $found = array();
835 $regexes = $this->getRegex();
836 foreach ( $regexes as $regex ) {
837 if ( $regex === '' ) {
838 continue;
839 }
840 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
841 foreach ( $matches as $m ) {
842 list( $name, $param ) = $this->parseMatch( $m );
843 $found[$name] = $param;
844 }
845 $text = preg_replace( $regex, '', $text );
846 }
847 return $found;
848 }
849
850 /**
851 * Return the ID of the magic word at the start of $text, and remove
852 * the prefix from $text.
853 * Return false if no match found and $text is not modified.
854 * Does not match parameters.
855 *
856 * @param $text string
857 *
858 * @return int|false
859 */
860 public function matchStartAndRemove( &$text ) {
861 $regexes = $this->getRegexStart();
862 foreach ( $regexes as $regex ) {
863 if ( $regex === '' ) {
864 continue;
865 }
866 if ( preg_match( $regex, $text, $m ) ) {
867 list( $id, ) = $this->parseMatch( $m );
868 if ( strlen( $m[0] ) >= strlen( $text ) ) {
869 $text = '';
870 } else {
871 $text = substr( $text, strlen( $m[0] ) );
872 }
873 return $id;
874 }
875 }
876 return false;
877 }
878 }