summaryrefslogtreecommitdiff
path: root/www/wiki/includes/MagicWord.php
diff options
context:
space:
mode:
authorYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
committerYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
commitfc7369835258467bf97eb64f184b93691f9a9fd5 (patch)
treedaabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/includes/MagicWord.php
first commit
Diffstat (limited to 'www/wiki/includes/MagicWord.php')
-rw-r--r--www/wiki/includes/MagicWord.php695
1 files changed, 695 insertions, 0 deletions
diff --git a/www/wiki/includes/MagicWord.php b/www/wiki/includes/MagicWord.php
new file mode 100644
index 00000000..93c8a71c
--- /dev/null
+++ b/www/wiki/includes/MagicWord.php
@@ -0,0 +1,695 @@
+<?php
+/**
+ * See docs/magicword.txt.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Parser
+ */
+
+/**
+ * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
+ *
+ * @par Usage:
+ * @code
+ * if (MagicWord::get( 'redirect' )->match( $text ) ) {
+ * // some code
+ * }
+ * @endcode
+ *
+ * Possible future improvements:
+ * * Simultaneous searching for a number of magic words
+ * * MagicWord::$mObjects in shared memory
+ *
+ * Please avoid reading the data out of one of these objects and then writing
+ * special case code. If possible, add another match()-like function here.
+ *
+ * To add magic words in an extension, use $magicWords in a file listed in
+ * $wgExtensionMessagesFiles[].
+ *
+ * @par Example:
+ * @code
+ * $magicWords = [];
+ *
+ * $magicWords['en'] = [
+ * 'magicwordkey' => [ 0, 'case_insensitive_magic_word' ],
+ * 'magicwordkey2' => [ 1, 'CASE_sensitive_magic_word2' ],
+ * ];
+ * @endcode
+ *
+ * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
+ * hook. Use string keys.
+ *
+ * @ingroup Parser
+ */
+class MagicWord {
+ /**#@-*/
+
+ /** @var string */
+ public $mId;
+
+ /** @var string[] */
+ public $mSynonyms;
+
+ /** @var bool */
+ public $mCaseSensitive;
+
+ /** @var string */
+ private $mRegex = '';
+
+ /** @var string */
+ private $mRegexStart = '';
+
+ /** @var string */
+ private $mRegexStartToEnd = '';
+
+ /** @var string */
+ private $mBaseRegex = '';
+
+ /** @var string */
+ private $mVariableRegex = '';
+
+ /** @var string */
+ private $mVariableStartToEndRegex = '';
+
+ /** @var bool */
+ private $mModified = false;
+
+ /** @var bool */
+ private $mFound = false;
+
+ /** @var bool */
+ public static $mVariableIDsInitialised = false;
+
+ /** @var string[] */
+ public static $mVariableIDs = [
+ '!',
+ 'currentmonth',
+ 'currentmonth1',
+ 'currentmonthname',
+ 'currentmonthnamegen',
+ 'currentmonthabbrev',
+ 'currentday',
+ 'currentday2',
+ 'currentdayname',
+ 'currentyear',
+ 'currenttime',
+ 'currenthour',
+ 'localmonth',
+ 'localmonth1',
+ 'localmonthname',
+ 'localmonthnamegen',
+ 'localmonthabbrev',
+ 'localday',
+ 'localday2',
+ 'localdayname',
+ 'localyear',
+ 'localtime',
+ 'localhour',
+ 'numberofarticles',
+ 'numberoffiles',
+ 'numberofedits',
+ 'articlepath',
+ 'pageid',
+ 'sitename',
+ 'server',
+ 'servername',
+ 'scriptpath',
+ 'stylepath',
+ 'pagename',
+ 'pagenamee',
+ 'fullpagename',
+ 'fullpagenamee',
+ 'namespace',
+ 'namespacee',
+ 'namespacenumber',
+ 'currentweek',
+ 'currentdow',
+ 'localweek',
+ 'localdow',
+ 'revisionid',
+ 'revisionday',
+ 'revisionday2',
+ 'revisionmonth',
+ 'revisionmonth1',
+ 'revisionyear',
+ 'revisiontimestamp',
+ 'revisionuser',
+ 'revisionsize',
+ 'subpagename',
+ 'subpagenamee',
+ 'talkspace',
+ 'talkspacee',
+ 'subjectspace',
+ 'subjectspacee',
+ 'talkpagename',
+ 'talkpagenamee',
+ 'subjectpagename',
+ 'subjectpagenamee',
+ 'numberofusers',
+ 'numberofactiveusers',
+ 'numberofpages',
+ 'currentversion',
+ 'rootpagename',
+ 'rootpagenamee',
+ 'basepagename',
+ 'basepagenamee',
+ 'currenttimestamp',
+ 'localtimestamp',
+ 'directionmark',
+ 'contentlanguage',
+ 'pagelanguage',
+ 'numberofadmins',
+ 'cascadingsources',
+ ];
+
+ /** Array of caching hints for ParserCache
+ * @var array [ string => int ]
+ */
+ public static $mCacheTTLs = [
+ 'currentmonth' => 86400,
+ 'currentmonth1' => 86400,
+ 'currentmonthname' => 86400,
+ 'currentmonthnamegen' => 86400,
+ 'currentmonthabbrev' => 86400,
+ 'currentday' => 3600,
+ 'currentday2' => 3600,
+ 'currentdayname' => 3600,
+ 'currentyear' => 86400,
+ 'currenttime' => 3600,
+ 'currenthour' => 3600,
+ 'localmonth' => 86400,
+ 'localmonth1' => 86400,
+ 'localmonthname' => 86400,
+ 'localmonthnamegen' => 86400,
+ 'localmonthabbrev' => 86400,
+ 'localday' => 3600,
+ 'localday2' => 3600,
+ 'localdayname' => 3600,
+ 'localyear' => 86400,
+ 'localtime' => 3600,
+ 'localhour' => 3600,
+ 'numberofarticles' => 3600,
+ 'numberoffiles' => 3600,
+ 'numberofedits' => 3600,
+ 'currentweek' => 3600,
+ 'currentdow' => 3600,
+ 'localweek' => 3600,
+ 'localdow' => 3600,
+ 'numberofusers' => 3600,
+ 'numberofactiveusers' => 3600,
+ 'numberofpages' => 3600,
+ 'currentversion' => 86400,
+ 'currenttimestamp' => 3600,
+ 'localtimestamp' => 3600,
+ 'pagesinnamespace' => 3600,
+ 'numberofadmins' => 3600,
+ 'numberingroup' => 3600,
+ ];
+
+ /** @var string[] */
+ public static $mDoubleUnderscoreIDs = [
+ 'notoc',
+ 'nogallery',
+ 'forcetoc',
+ 'toc',
+ 'noeditsection',
+ 'newsectionlink',
+ 'nonewsectionlink',
+ 'hiddencat',
+ 'index',
+ 'noindex',
+ 'staticredirect',
+ 'notitleconvert',
+ 'nocontentconvert',
+ ];
+
+ /** @var string[] */
+ public static $mSubstIDs = [
+ 'subst',
+ 'safesubst',
+ ];
+
+ /** @var array [ string => MagicWord ] */
+ public static $mObjects = [];
+
+ /** @var MagicWordArray */
+ public static $mDoubleUnderscoreArray = null;
+
+ /**#@-*/
+
+ /**
+ * Create a new MagicWord object
+ *
+ * Use factory instead: MagicWord::get
+ *
+ * @param string $id The internal name of the magic word
+ * @param string[]|string $syn synonyms for the magic word
+ * @param bool $cs If magic word is case sensitive
+ */
+ public function __construct( $id = null, $syn = [], $cs = false ) {
+ $this->mId = $id;
+ $this->mSynonyms = (array)$syn;
+ $this->mCaseSensitive = $cs;
+ }
+
+ /**
+ * Factory: creates an object representing an ID
+ *
+ * @param string $id The internal name of the magic word
+ *
+ * @return MagicWord
+ */
+ public static function &get( $id ) {
+ if ( !isset( self::$mObjects[$id] ) ) {
+ $mw = new MagicWord();
+ $mw->load( $id );
+ self::$mObjects[$id] = $mw;
+ }
+ return self::$mObjects[$id];
+ }
+
+ /**
+ * Get an array of parser variable IDs
+ *
+ * @return string[]
+ */
+ public static function getVariableIDs() {
+ if ( !self::$mVariableIDsInitialised ) {
+ # Get variable IDs
+ Hooks::run( 'MagicWordwgVariableIDs', [ &self::$mVariableIDs ] );
+ self::$mVariableIDsInitialised = true;
+ }
+ return self::$mVariableIDs;
+ }
+
+ /**
+ * Get an array of parser substitution modifier IDs
+ * @return string[]
+ */
+ public static function getSubstIDs() {
+ return self::$mSubstIDs;
+ }
+
+ /**
+ * Allow external reads of TTL array
+ *
+ * @param string $id
+ * @return int
+ */
+ public static function getCacheTTL( $id ) {
+ if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
+ return self::$mCacheTTLs[$id];
+ } else {
+ return -1;
+ }
+ }
+
+ /**
+ * Get a MagicWordArray of double-underscore entities
+ *
+ * @return MagicWordArray
+ */
+ public static function getDoubleUnderscoreArray() {
+ if ( is_null( self::$mDoubleUnderscoreArray ) ) {
+ Hooks::run( 'GetDoubleUnderscoreIDs', [ &self::$mDoubleUnderscoreIDs ] );
+ self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
+ }
+ return self::$mDoubleUnderscoreArray;
+ }
+
+ /**
+ * Clear the self::$mObjects variable
+ * For use in parser tests
+ */
+ public static function clearCache() {
+ self::$mObjects = [];
+ }
+
+ /**
+ * Initialises this object with an ID
+ *
+ * @param string $id
+ * @throws MWException
+ */
+ public function load( $id ) {
+ global $wgContLang;
+ $this->mId = $id;
+ $wgContLang->getMagic( $this );
+ if ( !$this->mSynonyms ) {
+ $this->mSynonyms = [ 'brionmademeputthishere' ];
+ throw new MWException( "Error: invalid magic word '$id'" );
+ }
+ }
+
+ /**
+ * Preliminary initialisation
+ * @private
+ */
+ public function initRegex() {
+ // Sort the synonyms by length, descending, so that the longest synonym
+ // matches in precedence to the shortest
+ $synonyms = $this->mSynonyms;
+ usort( $synonyms, [ $this, 'compareStringLength' ] );
+
+ $escSyn = [];
+ foreach ( $synonyms as $synonym ) {
+ // In case a magic word contains /, like that's going to happen;)
+ $escSyn[] = preg_quote( $synonym, '/' );
+ }
+ $this->mBaseRegex = implode( '|', $escSyn );
+
+ $case = $this->mCaseSensitive ? '' : 'iu';
+ $this->mRegex = "/{$this->mBaseRegex}/{$case}";
+ $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
+ $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
+ $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
+ $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
+ "/^(?:{$this->mBaseRegex})$/{$case}" );
+ }
+
+ /**
+ * A comparison function that returns -1, 0 or 1 depending on whether the
+ * first string is longer, the same length or shorter than the second
+ * string.
+ *
+ * @param string $s1
+ * @param string $s2
+ *
+ * @return int
+ */
+ public function compareStringLength( $s1, $s2 ) {
+ $l1 = strlen( $s1 );
+ $l2 = strlen( $s2 );
+ if ( $l1 < $l2 ) {
+ return 1;
+ } elseif ( $l1 > $l2 ) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ /**
+ * Gets a regex representing matching the word
+ *
+ * @return string
+ */
+ public function getRegex() {
+ if ( $this->mRegex == '' ) {
+ $this->initRegex();
+ }
+ return $this->mRegex;
+ }
+
+ /**
+ * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
+ * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
+ * the complete expression
+ *
+ * @return string
+ */
+ public function getRegexCase() {
+ if ( $this->mRegex === '' ) {
+ $this->initRegex();
+ }
+
+ return $this->mCaseSensitive ? '' : 'iu';
+ }
+
+ /**
+ * Gets a regex matching the word, if it is at the string start
+ *
+ * @return string
+ */
+ public function getRegexStart() {
+ if ( $this->mRegex == '' ) {
+ $this->initRegex();
+ }
+ return $this->mRegexStart;
+ }
+
+ /**
+ * Gets a regex matching the word from start to end of a string
+ *
+ * @return string
+ * @since 1.23
+ */
+ public function getRegexStartToEnd() {
+ if ( $this->mRegexStartToEnd == '' ) {
+ $this->initRegex();
+ }
+ return $this->mRegexStartToEnd;
+ }
+
+ /**
+ * regex without the slashes and what not
+ *
+ * @return string
+ */
+ public function getBaseRegex() {
+ if ( $this->mRegex == '' ) {
+ $this->initRegex();
+ }
+ return $this->mBaseRegex;
+ }
+
+ /**
+ * Returns true if the text contains the word
+ *
+ * @param string $text
+ *
+ * @return bool
+ */
+ public function match( $text ) {
+ return (bool)preg_match( $this->getRegex(), $text );
+ }
+
+ /**
+ * Returns true if the text starts with the word
+ *
+ * @param string $text
+ *
+ * @return bool
+ */
+ public function matchStart( $text ) {
+ return (bool)preg_match( $this->getRegexStart(), $text );
+ }
+
+ /**
+ * Returns true if the text matched the word
+ *
+ * @param string $text
+ *
+ * @return bool
+ * @since 1.23
+ */
+ public function matchStartToEnd( $text ) {
+ return (bool)preg_match( $this->getRegexStartToEnd(), $text );
+ }
+
+ /**
+ * Returns NULL if there's no match, the value of $1 otherwise
+ * The return code is the matched string, if there's no variable
+ * part in the regex and the matched variable part ($1) if there
+ * is one.
+ *
+ * @param string $text
+ *
+ * @return string
+ */
+ public function matchVariableStartToEnd( $text ) {
+ $matches = [];
+ $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
+ if ( $matchcount == 0 ) {
+ return null;
+ } else {
+ # multiple matched parts (variable match); some will be empty because of
+ # synonyms. The variable will be the second non-empty one so remove any
+ # blank elements and re-sort the indices.
+ # See also T8526
+
+ $matches = array_values( array_filter( $matches ) );
+
+ if ( count( $matches ) == 1 ) {
+ return $matches[0];
+ } else {
+ return $matches[1];
+ }
+ }
+ }
+
+ /**
+ * Returns true if the text matches the word, and alters the
+ * input string, removing all instances of the word
+ *
+ * @param string &$text
+ *
+ * @return bool
+ */
+ public function matchAndRemove( &$text ) {
+ $this->mFound = false;
+ $text = preg_replace_callback(
+ $this->getRegex(),
+ [ $this, 'pregRemoveAndRecord' ],
+ $text
+ );
+
+ return $this->mFound;
+ }
+
+ /**
+ * @param string &$text
+ * @return bool
+ */
+ public function matchStartAndRemove( &$text ) {
+ $this->mFound = false;
+ $text = preg_replace_callback(
+ $this->getRegexStart(),
+ [ $this, 'pregRemoveAndRecord' ],
+ $text
+ );
+
+ return $this->mFound;
+ }
+
+ /**
+ * Used in matchAndRemove()
+ *
+ * @return string
+ */
+ public function pregRemoveAndRecord() {
+ $this->mFound = true;
+ return '';
+ }
+
+ /**
+ * Replaces the word with something else
+ *
+ * @param string $replacement
+ * @param string $subject
+ * @param int $limit
+ *
+ * @return string
+ */
+ public function replace( $replacement, $subject, $limit = -1 ) {
+ $res = preg_replace(
+ $this->getRegex(),
+ StringUtils::escapeRegexReplacement( $replacement ),
+ $subject,
+ $limit
+ );
+ $this->mModified = $res !== $subject;
+ return $res;
+ }
+
+ /**
+ * Variable handling: {{SUBST:xxx}} style words
+ * Calls back a function to determine what to replace xxx with
+ * Input word must contain $1
+ *
+ * @param string $text
+ * @param callable $callback
+ *
+ * @return string
+ */
+ public function substituteCallback( $text, $callback ) {
+ $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
+ $this->mModified = $res !== $text;
+ return $res;
+ }
+
+ /**
+ * Matches the word, where $1 is a wildcard
+ *
+ * @return string
+ */
+ public function getVariableRegex() {
+ if ( $this->mVariableRegex == '' ) {
+ $this->initRegex();
+ }
+ return $this->mVariableRegex;
+ }
+
+ /**
+ * Matches the entire string, where $1 is a wildcard
+ *
+ * @return string
+ */
+ public function getVariableStartToEndRegex() {
+ if ( $this->mVariableStartToEndRegex == '' ) {
+ $this->initRegex();
+ }
+ return $this->mVariableStartToEndRegex;
+ }
+
+ /**
+ * Accesses the synonym list directly
+ *
+ * @param int $i
+ *
+ * @return string
+ */
+ public function getSynonym( $i ) {
+ return $this->mSynonyms[$i];
+ }
+
+ /**
+ * @return string[]
+ */
+ public function getSynonyms() {
+ return $this->mSynonyms;
+ }
+
+ /**
+ * Returns true if the last call to replace() or substituteCallback()
+ * returned a modified text, otherwise false.
+ *
+ * @return bool
+ */
+ public function getWasModified() {
+ return $this->mModified;
+ }
+
+ /**
+ * Adds all the synonyms of this MagicWord to an array, to allow quick
+ * lookup in a list of magic words
+ *
+ * @param string[] &$array
+ * @param string $value
+ */
+ public function addToArray( &$array, $value ) {
+ global $wgContLang;
+ foreach ( $this->mSynonyms as $syn ) {
+ $array[$wgContLang->lc( $syn )] = $value;
+ }
+ }
+
+ /**
+ * @return bool
+ */
+ public function isCaseSensitive() {
+ return $this->mCaseSensitive;
+ }
+
+ /**
+ * @return string
+ */
+ public function getId() {
+ return $this->mId;
+ }
+}