summaryrefslogtreecommitdiff
path: root/www/wiki/includes/parser/Preprocessor.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/includes/parser/Preprocessor.php')
-rw-r--r--www/wiki/includes/parser/Preprocessor.php436
1 files changed, 436 insertions, 0 deletions
diff --git a/www/wiki/includes/parser/Preprocessor.php b/www/wiki/includes/parser/Preprocessor.php
new file mode 100644
index 00000000..49e961ae
--- /dev/null
+++ b/www/wiki/includes/parser/Preprocessor.php
@@ -0,0 +1,436 @@
+<?php
+/**
+ * Interfaces for preprocessors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Parser
+ */
+
+use MediaWiki\Logger\LoggerFactory;
+
+/**
+ * @ingroup Parser
+ */
+abstract class Preprocessor {
+
+ const CACHE_VERSION = 1;
+
+ /**
+ * @var array Brace matching rules.
+ */
+ protected $rules = [
+ '{' => [
+ 'end' => '}',
+ 'names' => [
+ 2 => 'template',
+ 3 => 'tplarg',
+ ],
+ 'min' => 2,
+ 'max' => 3,
+ ],
+ '[' => [
+ 'end' => ']',
+ 'names' => [ 2 => null ],
+ 'min' => 2,
+ 'max' => 2,
+ ],
+ '-{' => [
+ 'end' => '}-',
+ 'names' => [ 2 => null ],
+ 'min' => 2,
+ 'max' => 2,
+ ],
+ ];
+
+ /**
+ * Store a document tree in the cache.
+ *
+ * @param string $text
+ * @param int $flags
+ * @param string $tree
+ */
+ protected function cacheSetTree( $text, $flags, $tree ) {
+ $config = RequestContext::getMain()->getConfig();
+
+ $length = strlen( $text );
+ $threshold = $config->get( 'PreprocessorCacheThreshold' );
+ if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
+ return;
+ }
+
+ $cache = ObjectCache::getLocalClusterInstance();
+ $key = $cache->makeKey(
+ defined( 'static::CACHE_PREFIX' ) ? static::CACHE_PREFIX : static::class,
+ md5( $text ), $flags );
+ $value = sprintf( "%08d", static::CACHE_VERSION ) . $tree;
+
+ $cache->set( $key, $value, 86400 );
+
+ LoggerFactory::getInstance( 'Preprocessor' )
+ ->info( "Cached preprocessor output (key: $key)" );
+ }
+
+ /**
+ * Attempt to load a precomputed document tree for some given wikitext
+ * from the cache.
+ *
+ * @param string $text
+ * @param int $flags
+ * @return PPNode_Hash_Tree|bool
+ */
+ protected function cacheGetTree( $text, $flags ) {
+ $config = RequestContext::getMain()->getConfig();
+
+ $length = strlen( $text );
+ $threshold = $config->get( 'PreprocessorCacheThreshold' );
+ if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
+ return false;
+ }
+
+ $cache = ObjectCache::getLocalClusterInstance();
+
+ $key = $cache->makeKey(
+ defined( 'static::CACHE_PREFIX' ) ? static::CACHE_PREFIX : static::class,
+ md5( $text ), $flags );
+
+ $value = $cache->get( $key );
+ if ( !$value ) {
+ return false;
+ }
+
+ $version = intval( substr( $value, 0, 8 ) );
+ if ( $version !== static::CACHE_VERSION ) {
+ return false;
+ }
+
+ LoggerFactory::getInstance( 'Preprocessor' )
+ ->info( "Loaded preprocessor output from cache (key: $key)" );
+
+ return substr( $value, 8 );
+ }
+
+ /**
+ * Create a new top-level frame for expansion of a page
+ *
+ * @return PPFrame
+ */
+ abstract public function newFrame();
+
+ /**
+ * Create a new custom frame for programmatic use of parameter replacement
+ * as used in some extensions.
+ *
+ * @param array $args
+ *
+ * @return PPFrame
+ */
+ abstract public function newCustomFrame( $args );
+
+ /**
+ * Create a new custom node for programmatic use of parameter replacement
+ * as used in some extensions.
+ *
+ * @param array $values
+ */
+ abstract public function newPartNodeArray( $values );
+
+ /**
+ * Preprocess text to a PPNode
+ *
+ * @param string $text
+ * @param int $flags
+ *
+ * @return PPNode
+ */
+ abstract public function preprocessToObj( $text, $flags = 0 );
+}
+
+/**
+ * @ingroup Parser
+ */
+interface PPFrame {
+ const NO_ARGS = 1;
+ const NO_TEMPLATES = 2;
+ const STRIP_COMMENTS = 4;
+ const NO_IGNORE = 8;
+ const RECOVER_COMMENTS = 16;
+ const NO_TAGS = 32;
+
+ const RECOVER_ORIG = 59; // = 1|2|8|16|32 no constant expression support in PHP yet
+
+ /** This constant exists when $indexOffset is supported in newChild() */
+ const SUPPORTS_INDEX_OFFSET = 1;
+
+ /**
+ * Create a child frame
+ *
+ * @param array|bool $args
+ * @param bool|Title $title
+ * @param int $indexOffset A number subtracted from the index attributes of the arguments
+ *
+ * @return PPFrame
+ */
+ public function newChild( $args = false, $title = false, $indexOffset = 0 );
+
+ /**
+ * Expand a document tree node, caching the result on its parent with the given key
+ * @param string|int $key
+ * @param string|PPNode $root
+ * @param int $flags
+ * @return string
+ */
+ public function cachedExpand( $key, $root, $flags = 0 );
+
+ /**
+ * Expand a document tree node
+ * @param string|PPNode $root
+ * @param int $flags
+ * @return string
+ */
+ public function expand( $root, $flags = 0 );
+
+ /**
+ * Implode with flags for expand()
+ * @param string $sep
+ * @param int $flags
+ * @param string|PPNode $args,...
+ * @return string
+ */
+ public function implodeWithFlags( $sep, $flags /*, ... */ );
+
+ /**
+ * Implode with no flags specified
+ * @param string $sep
+ * @param string|PPNode $args,...
+ * @return string
+ */
+ public function implode( $sep /*, ... */ );
+
+ /**
+ * Makes an object that, when expand()ed, will be the same as one obtained
+ * with implode()
+ * @param string $sep
+ * @param string|PPNode $args,...
+ * @return PPNode
+ */
+ public function virtualImplode( $sep /*, ... */ );
+
+ /**
+ * Virtual implode with brackets
+ * @param string $start
+ * @param string $sep
+ * @param string $end
+ * @param string|PPNode $args,...
+ * @return PPNode
+ */
+ public function virtualBracketedImplode( $start, $sep, $end /*, ... */ );
+
+ /**
+ * Returns true if there are no arguments in this frame
+ *
+ * @return bool
+ */
+ public function isEmpty();
+
+ /**
+ * Returns all arguments of this frame
+ * @return array
+ */
+ public function getArguments();
+
+ /**
+ * Returns all numbered arguments of this frame
+ * @return array
+ */
+ public function getNumberedArguments();
+
+ /**
+ * Returns all named arguments of this frame
+ * @return array
+ */
+ public function getNamedArguments();
+
+ /**
+ * Get an argument to this frame by name
+ * @param int|string $name
+ * @return string|bool
+ */
+ public function getArgument( $name );
+
+ /**
+ * Returns true if the infinite loop check is OK, false if a loop is detected
+ *
+ * @param Title $title
+ * @return bool
+ */
+ public function loopCheck( $title );
+
+ /**
+ * Return true if the frame is a template frame
+ * @return bool
+ */
+ public function isTemplate();
+
+ /**
+ * Set the "volatile" flag.
+ *
+ * Note that this is somewhat of a "hack" in order to make extensions
+ * with side effects (such as Cite) work with the PHP parser. New
+ * extensions should be written in a way that they do not need this
+ * function, because other parsers (such as Parsoid) are not guaranteed
+ * to respect it, and it may be removed in the future.
+ *
+ * @param bool $flag
+ */
+ public function setVolatile( $flag = true );
+
+ /**
+ * Get the "volatile" flag.
+ *
+ * Callers should avoid caching the result of an expansion if it has the
+ * volatile flag set.
+ *
+ * @see self::setVolatile()
+ * @return bool
+ */
+ public function isVolatile();
+
+ /**
+ * Get the TTL of the frame's output.
+ *
+ * This is the maximum amount of time, in seconds, that this frame's
+ * output should be cached for. A value of null indicates that no
+ * maximum has been specified.
+ *
+ * Note that this TTL only applies to caching frames as parts of pages.
+ * It is not relevant to caching the entire rendered output of a page.
+ *
+ * @return int|null
+ */
+ public function getTTL();
+
+ /**
+ * Set the TTL of the output of this frame and all of its ancestors.
+ * Has no effect if the new TTL is greater than the one already set.
+ * Note that it is the caller's responsibility to change the cache
+ * expiry of the page as a whole, if such behavior is desired.
+ *
+ * @see self::getTTL()
+ * @param int $ttl
+ */
+ public function setTTL( $ttl );
+
+ /**
+ * Get a title of frame
+ *
+ * @return Title
+ */
+ public function getTitle();
+}
+
+/**
+ * There are three types of nodes:
+ * * Tree nodes, which have a name and contain other nodes as children
+ * * Array nodes, which also contain other nodes but aren't considered part of a tree
+ * * Leaf nodes, which contain the actual data
+ *
+ * This interface provides access to the tree structure and to the contents of array nodes,
+ * but it does not provide access to the internal structure of leaf nodes. Access to leaf
+ * data is provided via two means:
+ * * PPFrame::expand(), which provides expanded text
+ * * The PPNode::split*() functions, which provide metadata about certain types of tree node
+ * @ingroup Parser
+ */
+interface PPNode {
+ /**
+ * Get an array-type node containing the children of this node.
+ * Returns false if this is not a tree node.
+ * @return PPNode
+ */
+ public function getChildren();
+
+ /**
+ * Get the first child of a tree node. False if there isn't one.
+ *
+ * @return PPNode
+ */
+ public function getFirstChild();
+
+ /**
+ * Get the next sibling of any node. False if there isn't one
+ * @return PPNode
+ */
+ public function getNextSibling();
+
+ /**
+ * Get all children of this tree node which have a given name.
+ * Returns an array-type node, or false if this is not a tree node.
+ * @param string $type
+ * @return bool|PPNode
+ */
+ public function getChildrenOfType( $type );
+
+ /**
+ * Returns the length of the array, or false if this is not an array-type node
+ */
+ public function getLength();
+
+ /**
+ * Returns an item of an array-type node
+ * @param int $i
+ * @return bool|PPNode
+ */
+ public function item( $i );
+
+ /**
+ * Get the name of this node. The following names are defined here:
+ *
+ * h A heading node.
+ * template A double-brace node.
+ * tplarg A triple-brace node.
+ * title The first argument to a template or tplarg node.
+ * part Subsequent arguments to a template or tplarg node.
+ * #nodelist An array-type node
+ *
+ * The subclass may define various other names for tree and leaf nodes.
+ * @return string
+ */
+ public function getName();
+
+ /**
+ * Split a "<part>" node into an associative array containing:
+ * name PPNode name
+ * index String index
+ * value PPNode value
+ * @return array
+ */
+ public function splitArg();
+
+ /**
+ * Split an "<ext>" node into an associative array containing name, attr, inner and close
+ * All values in the resulting array are PPNodes. Inner and close are optional.
+ * @return array
+ */
+ public function splitExt();
+
+ /**
+ * Split an "<h>" node
+ * @return array
+ */
+ public function splitHeading();
+}