3 * Interfaces for preprocessors
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
24 use MediaWiki\Logger\LoggerFactory
;
29 abstract class Preprocessor
{
31 const CACHE_VERSION
= 1;
34 * @var array Brace matching rules.
48 'names' => [ 2 => null ],
54 'names' => [ 2 => null ],
61 * Store a document tree in the cache.
67 protected function cacheSetTree( $text, $flags, $tree ) {
68 $config = RequestContext
::getMain()->getConfig();
70 $length = strlen( $text );
71 $threshold = $config->get( 'PreprocessorCacheThreshold' );
72 if ( $threshold === false ||
$length < $threshold ||
$length > 1e6
) {
76 $cache = ObjectCache
::getLocalClusterInstance();
77 $key = $cache->makeKey(
78 defined( 'static::CACHE_PREFIX' ) ?
static::CACHE_PREFIX
: static::class,
79 md5( $text ), $flags );
80 $value = sprintf( "%08d", static::CACHE_VERSION
) . $tree;
82 $cache->set( $key, $value, 86400 );
84 LoggerFactory
::getInstance( 'Preprocessor' )
85 ->info( "Cached preprocessor output (key: $key)" );
89 * Attempt to load a precomputed document tree for some given wikitext
94 * @return PPNode_Hash_Tree|bool
96 protected function cacheGetTree( $text, $flags ) {
97 $config = RequestContext
::getMain()->getConfig();
99 $length = strlen( $text );
100 $threshold = $config->get( 'PreprocessorCacheThreshold' );
101 if ( $threshold === false ||
$length < $threshold ||
$length > 1e6
) {
105 $cache = ObjectCache
::getLocalClusterInstance();
107 $key = $cache->makeKey(
108 defined( 'static::CACHE_PREFIX' ) ?
static::CACHE_PREFIX
: static::class,
109 md5( $text ), $flags );
111 $value = $cache->get( $key );
116 $version = intval( substr( $value, 0, 8 ) );
117 if ( $version !== static::CACHE_VERSION
) {
121 LoggerFactory
::getInstance( 'Preprocessor' )
122 ->info( "Loaded preprocessor output from cache (key: $key)" );
124 return substr( $value, 8 );
128 * Create a new top-level frame for expansion of a page
132 abstract public function newFrame();
135 * Create a new custom frame for programmatic use of parameter replacement
136 * as used in some extensions.
142 abstract public function newCustomFrame( $args );
145 * Create a new custom node for programmatic use of parameter replacement
146 * as used in some extensions.
148 * @param array $values
150 abstract public function newPartNodeArray( $values );
153 * Preprocess text to a PPNode
155 * @param string $text
160 abstract public function preprocessToObj( $text, $flags = 0 );
168 const NO_TEMPLATES
= 2;
169 const STRIP_COMMENTS
= 4;
171 const RECOVER_COMMENTS
= 16;
174 const RECOVER_ORIG
= 59; // = 1|2|8|16|32 no constant expression support in PHP yet
176 /** This constant exists when $indexOffset is supported in newChild() */
177 const SUPPORTS_INDEX_OFFSET
= 1;
180 * Create a child frame
182 * @param array|bool $args
183 * @param bool|Title $title
184 * @param int $indexOffset A number subtracted from the index attributes of the arguments
188 public function newChild( $args = false, $title = false, $indexOffset = 0 );
191 * Expand a document tree node, caching the result on its parent with the given key
192 * @param string|int $key
193 * @param string|PPNode $root
197 public function cachedExpand( $key, $root, $flags = 0 );
200 * Expand a document tree node
201 * @param string|PPNode $root
205 public function expand( $root, $flags = 0 );
208 * Implode with flags for expand()
211 * @param string|PPNode $args,...
214 public function implodeWithFlags( $sep, $flags /*, ... */ );
217 * Implode with no flags specified
219 * @param string|PPNode $args,...
222 public function implode( $sep /*, ... */ );
225 * Makes an object that, when expand()ed, will be the same as one obtained
228 * @param string|PPNode $args,...
231 public function virtualImplode( $sep /*, ... */ );
234 * Virtual implode with brackets
235 * @param string $start
238 * @param string|PPNode $args,...
241 public function virtualBracketedImplode( $start, $sep, $end /*, ... */ );
244 * Returns true if there are no arguments in this frame
248 public function isEmpty();
251 * Returns all arguments of this frame
254 public function getArguments();
257 * Returns all numbered arguments of this frame
260 public function getNumberedArguments();
263 * Returns all named arguments of this frame
266 public function getNamedArguments();
269 * Get an argument to this frame by name
270 * @param int|string $name
271 * @return string|bool
273 public function getArgument( $name );
276 * Returns true if the infinite loop check is OK, false if a loop is detected
278 * @param Title $title
281 public function loopCheck( $title );
284 * Return true if the frame is a template frame
287 public function isTemplate();
290 * Set the "volatile" flag.
292 * Note that this is somewhat of a "hack" in order to make extensions
293 * with side effects (such as Cite) work with the PHP parser. New
294 * extensions should be written in a way that they do not need this
295 * function, because other parsers (such as Parsoid) are not guaranteed
296 * to respect it, and it may be removed in the future.
300 public function setVolatile( $flag = true );
303 * Get the "volatile" flag.
305 * Callers should avoid caching the result of an expansion if it has the
308 * @see self::setVolatile()
311 public function isVolatile();
314 * Get the TTL of the frame's output.
316 * This is the maximum amount of time, in seconds, that this frame's
317 * output should be cached for. A value of null indicates that no
318 * maximum has been specified.
320 * Note that this TTL only applies to caching frames as parts of pages.
321 * It is not relevant to caching the entire rendered output of a page.
325 public function getTTL();
328 * Set the TTL of the output of this frame and all of its ancestors.
329 * Has no effect if the new TTL is greater than the one already set.
330 * Note that it is the caller's responsibility to change the cache
331 * expiry of the page as a whole, if such behavior is desired.
333 * @see self::getTTL()
336 public function setTTL( $ttl );
339 * Get a title of frame
343 public function getTitle();
347 * There are three types of nodes:
348 * * Tree nodes, which have a name and contain other nodes as children
349 * * Array nodes, which also contain other nodes but aren't considered part of a tree
350 * * Leaf nodes, which contain the actual data
352 * This interface provides access to the tree structure and to the contents of array nodes,
353 * but it does not provide access to the internal structure of leaf nodes. Access to leaf
354 * data is provided via two means:
355 * * PPFrame::expand(), which provides expanded text
356 * * The PPNode::split*() functions, which provide metadata about certain types of tree node
361 * Get an array-type node containing the children of this node.
362 * Returns false if this is not a tree node.
365 public function getChildren();
368 * Get the first child of a tree node. False if there isn't one.
372 public function getFirstChild();
375 * Get the next sibling of any node. False if there isn't one
378 public function getNextSibling();
381 * Get all children of this tree node which have a given name.
382 * Returns an array-type node, or false if this is not a tree node.
383 * @param string $type
384 * @return bool|PPNode
386 public function getChildrenOfType( $type );
389 * Returns the length of the array, or false if this is not an array-type node
391 public function getLength();
394 * Returns an item of an array-type node
396 * @return bool|PPNode
398 public function item( $i );
401 * Get the name of this node. The following names are defined here:
404 * template A double-brace node.
405 * tplarg A triple-brace node.
406 * title The first argument to a template or tplarg node.
407 * part Subsequent arguments to a template or tplarg node.
408 * #nodelist An array-type node
410 * The subclass may define various other names for tree and leaf nodes.
413 public function getName();
416 * Split a "<part>" node into an associative array containing:
422 public function splitArg();
425 * Split an "<ext>" node into an associative array containing name, attr, inner and close
426 * All values in the resulting array are PPNodes. Inner and close are optional.
429 public function splitExt();
432 * Split an "<h>" node
435 public function splitHeading();