4 * See docs/magicword.txt.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
25 use MediaWiki\Logger\LoggerFactory
;
26 use MediaWiki\MediaWikiServices
;
29 * Class for handling an array of magic words
32 class MagicWordArray
{
36 /** @var MagicWordFactory */
48 * @param MagicWordFactory|null $factory
50 public function __construct( $names = [], MagicWordFactory
$factory = null ) {
51 $this->names
= $names;
52 $this->factory
= $factory;
54 $this->factory
= MediaWikiServices
::getInstance()->getMagicWordFactory();
59 * Add a magic word by name
63 public function add( $name ) {
64 $this->names
[] = $name;
65 $this->hash
= $this->baseRegex
= $this->regex
= null;
69 * Add a number of magic words by name
73 public function addArray( $names ) {
74 $this->names
= array_merge( $this->names
, array_values( $names ) );
75 $this->hash
= $this->baseRegex
= $this->regex
= null;
79 * Get a 2-d hashtable for this array
82 public function getHash() {
83 if ( is_null( $this->hash
) ) {
84 $this->hash
= [ 0 => [], 1 => [] ];
85 foreach ( $this->names
as $name ) {
86 $magic = $this->factory
->get( $name );
87 $case = intval( $magic->isCaseSensitive() );
88 foreach ( $magic->getSynonyms() as $syn ) {
90 $syn = $this->factory
->getContentLanguage()->lc( $syn );
92 $this->hash
[$case][$syn] = $name;
103 public function getBaseRegex() {
104 if ( is_null( $this->baseRegex
) ) {
105 $this->baseRegex
= [ 0 => '', 1 => '' ];
107 foreach ( $this->names
as $name ) {
108 $magic = $this->factory
->get( $name );
109 $case = intval( $magic->isCaseSensitive() );
110 foreach ( $magic->getSynonyms() as $i => $syn ) {
111 // Group name must start with a non-digit in PCRE 8.34+
112 $it = strtr( $i, '0123456789', 'abcdefghij' );
113 $groupName = $it . '_' . $name;
114 $group = '(?P<' . $groupName . '>' . preg_quote( $syn, '/' ) . ')';
115 // look for same group names to avoid same named subpatterns in the regex
116 if ( isset( $allGroups[$groupName] ) ) {
117 throw new MWException(
118 __METHOD__
. ': duplicate internal name in magic word array: ' . $name
121 $allGroups[$groupName] = true;
122 if ( $this->baseRegex
[$case] === '' ) {
123 $this->baseRegex
[$case] = $group;
125 $this->baseRegex
[$case] .= '|' . $group;
130 return $this->baseRegex
;
134 * Get an unanchored regex that does not match parameters
137 public function getRegex() {
138 if ( is_null( $this->regex
) ) {
139 $base = $this->getBaseRegex();
140 $this->regex
= [ '', '' ];
141 if ( $this->baseRegex
[0] !== '' ) {
142 $this->regex
[0] = "/{$base[0]}/iuS";
144 if ( $this->baseRegex
[1] !== '' ) {
145 $this->regex
[1] = "/{$base[1]}/S";
152 * Get a regex for matching variables with parameters
156 public function getVariableRegex() {
157 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
161 * Get a regex anchored to the start of the string that does not match parameters
165 public function getRegexStart() {
166 $base = $this->getBaseRegex();
167 $newRegex = [ '', '' ];
168 if ( $base[0] !== '' ) {
169 $newRegex[0] = "/^(?:{$base[0]})/iuS";
171 if ( $base[1] !== '' ) {
172 $newRegex[1] = "/^(?:{$base[1]})/S";
178 * Get an anchored regex for matching variables with parameters
182 public function getVariableStartToEndRegex() {
183 $base = $this->getBaseRegex();
184 $newRegex = [ '', '' ];
185 if ( $base[0] !== '' ) {
186 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
188 if ( $base[1] !== '' ) {
189 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
198 public function getNames() {
203 * Parse a match array from preg_match
204 * Returns array(magic word ID, parameter value)
205 * If there is no parameter value, that element will be false.
209 * @throws MWException
212 public function parseMatch( $m ) {
214 while ( ( $key = key( $m ) ) !== null ) {
215 $value = current( $m );
217 if ( $key === 0 ||
$value === '' ) {
220 $parts = explode( '_', $key, 2 );
221 if ( count( $parts ) != 2 ) {
222 // This shouldn't happen
224 throw new MWException( __METHOD__
. ': bad parameter name' );
226 list( /* $synIndex */, $magicName ) = $parts;
227 $paramValue = next( $m );
228 return [ $magicName, $paramValue ];
230 // This shouldn't happen either
231 throw new MWException( __METHOD__
. ': parameter not found' );
235 * Match some text, with parameter capture
236 * Returns an array with the magic word name in the first element and the
237 * parameter in the second element.
238 * Both elements are false if there was no match.
240 * @param string $text
244 public function matchVariableStartToEnd( $text ) {
245 $regexes = $this->getVariableStartToEndRegex();
246 foreach ( $regexes as $regex ) {
247 if ( $regex !== '' ) {
249 if ( preg_match( $regex, $text, $m ) ) {
250 return $this->parseMatch( $m );
254 return [ false, false ];
258 * Match some text, without parameter capture
259 * Returns the magic word name, or false if there was no capture
261 * @param string $text
263 * @return string|bool False on failure
265 public function matchStartToEnd( $text ) {
266 $hash = $this->getHash();
267 if ( isset( $hash[1][$text] ) ) {
268 return $hash[1][$text];
270 $lc = $this->factory
->getContentLanguage()->lc( $text );
271 if ( isset( $hash[0][$lc] ) ) {
272 return $hash[0][$lc];
278 * Returns an associative array, ID => param value, for all items that match
279 * Removes the matched items from the input string (passed by reference)
281 * @param string &$text
285 public function matchAndRemove( &$text ) {
287 $regexes = $this->getRegex();
288 foreach ( $regexes as $regex ) {
289 if ( $regex === '' ) {
293 $res = preg_match_all( $regex, $text, $matches, PREG_SET_ORDER
);
294 if ( $res === false ) {
295 LoggerFactory
::getInstance( 'parser' )->warning( 'preg_match_all returned false', [
296 'code' => preg_last_error(),
301 foreach ( $matches as $m ) {
302 list( $name, $param ) = $this->parseMatch( $m );
303 $found[$name] = $param;
306 $res = preg_replace( $regex, '', $text );
307 if ( $res === null ) {
308 LoggerFactory
::getInstance( 'parser' )->warning( 'preg_replace returned null', [
309 'code' => preg_last_error(),
320 * Return the ID of the magic word at the start of $text, and remove
321 * the prefix from $text.
322 * Return false if no match found and $text is not modified.
323 * Does not match parameters.
325 * @param string &$text
327 * @return int|bool False on failure
329 public function matchStartAndRemove( &$text ) {
330 $regexes = $this->getRegexStart();
331 foreach ( $regexes as $regex ) {
332 if ( $regex === '' ) {
335 if ( preg_match( $regex, $text, $m ) ) {
336 list( $id, ) = $this->parseMatch( $m );
337 if ( strlen( $m[0] ) >= strlen( $text ) ) {
340 $text = substr( $text, strlen( $m[0] ) );