summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/AbuseFilter/includes/parser
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/extensions/AbuseFilter/includes/parser')
-rw-r--r--www/wiki/extensions/AbuseFilter/includes/parser/AFPData.php497
-rw-r--r--www/wiki/extensions/AbuseFilter/includes/parser/AFPException.php4
-rw-r--r--www/wiki/extensions/AbuseFilter/includes/parser/AFPParserState.php10
-rw-r--r--www/wiki/extensions/AbuseFilter/includes/parser/AFPToken.php61
-rw-r--r--www/wiki/extensions/AbuseFilter/includes/parser/AFPTreeNode.php126
-rw-r--r--www/wiki/extensions/AbuseFilter/includes/parser/AFPTreeParser.php611
-rw-r--r--www/wiki/extensions/AbuseFilter/includes/parser/AFPUserVisibleException.php40
-rw-r--r--www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterCachingParser.php279
-rw-r--r--www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterParser.php1560
-rw-r--r--www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterTokenizer.php258
10 files changed, 3446 insertions, 0 deletions
diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AFPData.php b/www/wiki/extensions/AbuseFilter/includes/parser/AFPData.php
new file mode 100644
index 00000000..ff1faa98
--- /dev/null
+++ b/www/wiki/extensions/AbuseFilter/includes/parser/AFPData.php
@@ -0,0 +1,497 @@
+<?php
+
+class AFPData {
+ // Datatypes
+ const DINT = 'int';
+ const DSTRING = 'string';
+ const DNULL = 'null';
+ const DBOOL = 'bool';
+ const DFLOAT = 'float';
+ const DLIST = 'list';
+
+ // Translation table mapping shell-style wildcards to PCRE equivalents.
+ // Derived from <http://www.php.net/manual/en/function.fnmatch.php#100207>
+ private static $wildcardMap = [
+ '\*' => '.*',
+ '\+' => '\+',
+ '\-' => '\-',
+ '\.' => '\.',
+ '\?' => '.',
+ '\[' => '[',
+ '\[\!' => '[^',
+ '\\' => '\\\\',
+ '\]' => ']',
+ ];
+
+ public $type;
+ public $data;
+
+ /**
+ * @param string $type
+ * @param null $val
+ */
+ public function __construct( $type = self::DNULL, $val = null ) {
+ $this->type = $type;
+ $this->data = $val;
+ }
+
+ /**
+ * @param mixed $var
+ * @return AFPData
+ * @throws AFPException
+ */
+ public static function newFromPHPVar( $var ) {
+ if ( is_string( $var ) ) {
+ return new AFPData( self::DSTRING, $var );
+ } elseif ( is_int( $var ) ) {
+ return new AFPData( self::DINT, $var );
+ } elseif ( is_float( $var ) ) {
+ return new AFPData( self::DFLOAT, $var );
+ } elseif ( is_bool( $var ) ) {
+ return new AFPData( self::DBOOL, $var );
+ } elseif ( is_array( $var ) ) {
+ $result = [];
+ foreach ( $var as $item ) {
+ $result[] = self::newFromPHPVar( $item );
+ }
+
+ return new AFPData( self::DLIST, $result );
+ } elseif ( is_null( $var ) ) {
+ return new AFPData();
+ } else {
+ throw new AFPException(
+ 'Data type ' . gettype( $var ) . ' is not supported by AbuseFilter'
+ );
+ }
+ }
+
+ /**
+ * @return AFPData
+ */
+ public function dup() {
+ return new AFPData( $this->type, $this->data );
+ }
+
+ /**
+ * @param AFPData $orig
+ * @param string $target
+ * @return AFPData
+ */
+ public static function castTypes( $orig, $target ) {
+ if ( $orig->type == $target ) {
+ return $orig->dup();
+ }
+ if ( $target == self::DNULL ) {
+ return new AFPData();
+ }
+
+ if ( $orig->type == self::DLIST ) {
+ if ( $target == self::DBOOL ) {
+ return new AFPData( self::DBOOL, (bool)count( $orig->data ) );
+ }
+ if ( $target == self::DFLOAT ) {
+ return new AFPData( self::DFLOAT, floatval( count( $orig->data ) ) );
+ }
+ if ( $target == self::DINT ) {
+ return new AFPData( self::DINT, intval( count( $orig->data ) ) );
+ }
+ if ( $target == self::DSTRING ) {
+ $s = '';
+ foreach ( $orig->data as $item ) {
+ $s .= $item->toString() . "\n";
+ }
+
+ return new AFPData( self::DSTRING, $s );
+ }
+ }
+
+ if ( $target == self::DBOOL ) {
+ return new AFPData( self::DBOOL, (bool)$orig->data );
+ }
+ if ( $target == self::DFLOAT ) {
+ return new AFPData( self::DFLOAT, floatval( $orig->data ) );
+ }
+ if ( $target == self::DINT ) {
+ return new AFPData( self::DINT, intval( $orig->data ) );
+ }
+ if ( $target == self::DSTRING ) {
+ return new AFPData( self::DSTRING, strval( $orig->data ) );
+ }
+ if ( $target == self::DLIST ) {
+ return new AFPData( self::DLIST, [ $orig ] );
+ }
+ }
+
+ /**
+ * @param AFPData $value
+ * @return AFPData
+ */
+ public static function boolInvert( $value ) {
+ return new AFPData( self::DBOOL, !$value->toBool() );
+ }
+
+ /**
+ * @param AFPData $base
+ * @param AFPData $exponent
+ * @return AFPData
+ */
+ public static function pow( $base, $exponent ) {
+ $res = pow( $base->toNumber(), $exponent->toNumber() );
+ if ( $res === (int)$res ) {
+ return new AFPData( self::DINT, $res );
+ } else {
+ return new AFPData( self::DFLOAT, $res );
+ }
+ }
+
+ /**
+ * @param AFPData $a
+ * @param AFPData $b
+ * @return AFPData
+ */
+ public static function keywordIn( $a, $b ) {
+ $a = $a->toString();
+ $b = $b->toString();
+
+ if ( $a == '' || $b == '' ) {
+ return new AFPData( self::DBOOL, false );
+ }
+
+ return new AFPData( self::DBOOL, strpos( $b, $a ) !== false );
+ }
+
+ /**
+ * @param AFPData $a
+ * @param AFPData $b
+ * @return AFPData
+ */
+ public static function keywordContains( $a, $b ) {
+ $a = $a->toString();
+ $b = $b->toString();
+
+ if ( $a == '' || $b == '' ) {
+ return new AFPData( self::DBOOL, false );
+ }
+
+ return new AFPData( self::DBOOL, strpos( $a, $b ) !== false );
+ }
+
+ /**
+ * @param string $value
+ * @param mixed $list
+ * @return bool
+ */
+ public static function listContains( $value, $list ) {
+ // Should use built-in PHP function somehow
+ foreach ( $list->data as $item ) {
+ if ( self::equals( $value, $item ) ) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * @ToDo Should we also build a proper system to compare arrays with different types?
+ * @param AFPData $d1
+ * @param AFPData $d2
+ * @param bool $strict whether to also check types
+ * @return bool
+ */
+ public static function equals( $d1, $d2, $strict = false ) {
+ if ( $d1->type != self::DLIST && $d2->type != self::DLIST ) {
+ $typecheck = $d1->type == $d2->type || !$strict;
+ return $typecheck && $d1->toString() === $d2->toString();
+ } elseif ( $d1->type == self::DLIST && $d2->type == self::DLIST ) {
+ $data1 = $d1->data;
+ $data2 = $d2->data;
+ if ( count( $data1 ) !== count( $data2 ) ) {
+ return false;
+ }
+ $length = count( $data1 );
+ for ( $i = 0; $i < $length; $i++ ) {
+ $result = self::equals( $data1[$i], $data2[$i], $strict );
+ if ( $result === false ) {
+ return false;
+ }
+ }
+ return true;
+ } else {
+ // Trying to compare an array to something else
+ return false;
+ }
+ }
+
+ /**
+ * @param AFPData $str
+ * @param AFPData $pattern
+ * @return AFPData
+ */
+ public static function keywordLike( $str, $pattern ) {
+ $str = $str->toString();
+ $pattern = '#^' . strtr( preg_quote( $pattern->toString(), '#' ), self::$wildcardMap ) . '$#u';
+ Wikimedia\suppressWarnings();
+ $result = preg_match( $pattern, $str );
+ Wikimedia\restoreWarnings();
+
+ return new AFPData( self::DBOOL, (bool)$result );
+ }
+
+ /**
+ * @param AFPData $str
+ * @param AFPData $regex
+ * @param int $pos
+ * @param bool $insensitive
+ * @return AFPData
+ * @throws Exception
+ */
+ public static function keywordRegex( $str, $regex, $pos, $insensitive = false ) {
+ $str = $str->toString();
+ $pattern = $regex->toString();
+
+ $pattern = preg_replace( '!(\\\\\\\\)*(\\\\)?/!', '$1\/', $pattern );
+ $pattern = "/$pattern/u";
+
+ if ( $insensitive ) {
+ $pattern .= 'i';
+ }
+
+ Wikimedia\suppressWarnings();
+ $result = preg_match( $pattern, $str );
+ Wikimedia\restoreWarnings();
+ if ( $result === false ) {
+ throw new AFPUserVisibleException(
+ 'regexfailure',
+ $pos,
+ [ 'unspecified error in preg_match()', $pattern ]
+ );
+ }
+
+ return new AFPData( self::DBOOL, (bool)$result );
+ }
+
+ /**
+ * @param string $str
+ * @param string $regex
+ * @param int $pos
+ * @return AFPData
+ */
+ public static function keywordRegexInsensitive( $str, $regex, $pos ) {
+ return self::keywordRegex( $str, $regex, $pos, true );
+ }
+
+ /**
+ * @param AFPData $data
+ * @return AFPData
+ */
+ public static function unaryMinus( $data ) {
+ if ( $data->type == self::DINT ) {
+ return new AFPData( $data->type, -$data->toInt() );
+ } else {
+ return new AFPData( $data->type, -$data->toFloat() );
+ }
+ }
+
+ /**
+ * @param AFPData $a
+ * @param AFPData $b
+ * @param string $op
+ * @return AFPData
+ * @throws AFPException
+ */
+ public static function boolOp( $a, $b, $op ) {
+ $a = $a->toBool();
+ $b = $b->toBool();
+ if ( $op == '|' ) {
+ return new AFPData( self::DBOOL, $a || $b );
+ }
+ if ( $op == '&' ) {
+ return new AFPData( self::DBOOL, $a && $b );
+ }
+ if ( $op == '^' ) {
+ return new AFPData( self::DBOOL, $a xor $b );
+ }
+ throw new AFPException( "Invalid boolean operation: {$op}" ); // Should never happen.
+ }
+
+ /**
+ * @param AFPData $a
+ * @param AFPData $b
+ * @param string $op
+ * @return AFPData
+ * @throws AFPException
+ */
+ public static function compareOp( $a, $b, $op ) {
+ if ( $op == '==' || $op == '=' ) {
+ return new AFPData( self::DBOOL, self::equals( $a, $b ) );
+ }
+ if ( $op == '!=' ) {
+ return new AFPData( self::DBOOL, !self::equals( $a, $b ) );
+ }
+ if ( $op == '===' ) {
+ return new AFPData( self::DBOOL, self::equals( $a, $b, true ) );
+ }
+ if ( $op == '!==' ) {
+ return new AFPData( self::DBOOL, !self::equals( $a, $b, true ) );
+ }
+ $a = $a->toString();
+ $b = $b->toString();
+ if ( $op == '>' ) {
+ return new AFPData( self::DBOOL, $a > $b );
+ }
+ if ( $op == '<' ) {
+ return new AFPData( self::DBOOL, $a < $b );
+ }
+ if ( $op == '>=' ) {
+ return new AFPData( self::DBOOL, $a >= $b );
+ }
+ if ( $op == '<=' ) {
+ return new AFPData( self::DBOOL, $a <= $b );
+ }
+ throw new AFPException( "Invalid comparison operation: {$op}" ); // Should never happen
+ }
+
+ /**
+ * @param AFPData $a
+ * @param AFPData $b
+ * @param string $op
+ * @param int $pos
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ * @throws AFPException
+ */
+ public static function mulRel( $a, $b, $op, $pos ) {
+ $a = $a->toNumber();
+ $b = $b->toNumber();
+
+ if ( $op != '*' && $b == 0 ) {
+ throw new AFPUserVisibleException( 'dividebyzero', $pos, [ $a ] );
+ }
+
+ if ( $op == '*' ) {
+ $data = $a * $b;
+ } elseif ( $op == '/' ) {
+ $data = $a / $b;
+ } elseif ( $op == '%' ) {
+ $data = $a % $b;
+ } else {
+ // Should never happen
+ throw new AFPException( "Invalid multiplication-related operation: {$op}" );
+ }
+
+ if ( $data === (int)$data ) {
+ $data = intval( $data );
+ $type = self::DINT;
+ } else {
+ $data = floatval( $data );
+ $type = self::DFLOAT;
+ }
+
+ return new AFPData( $type, $data );
+ }
+
+ /**
+ * @param AFPData $a
+ * @param AFPData $b
+ * @return AFPData
+ */
+ public static function sum( $a, $b ) {
+ if ( $a->type == self::DSTRING || $b->type == self::DSTRING ) {
+ return new AFPData( self::DSTRING, $a->toString() . $b->toString() );
+ } elseif ( $a->type == self::DLIST && $b->type == self::DLIST ) {
+ return new AFPData( self::DLIST, array_merge( $a->toList(), $b->toList() ) );
+ } else {
+ $res = $a->toNumber() + $b->toNumber();
+ if ( $res === (int)$res ) {
+ return new AFPData( self::DINT, $res );
+ } else {
+ return new AFPData( self::DFLOAT, $res );
+ }
+ }
+ }
+
+ /**
+ * @param AFPData $a
+ * @param AFPData $b
+ * @return AFPData
+ */
+ public static function sub( $a, $b ) {
+ $res = $a->toNumber() - $b->toNumber();
+ if ( $res === (int)$res ) {
+ return new AFPData( self::DINT, $res );
+ } else {
+ return new AFPData( self::DFLOAT, $res );
+ }
+ }
+
+ /** Convert shorteners */
+
+ /**
+ * @throws MWException
+ * @return mixed
+ */
+ public function toNative() {
+ switch ( $this->type ) {
+ case self::DBOOL:
+ return $this->toBool();
+ case self::DSTRING:
+ return $this->toString();
+ case self::DFLOAT:
+ return $this->toFloat();
+ case self::DINT:
+ return $this->toInt();
+ case self::DLIST:
+ $input = $this->toList();
+ $output = [];
+ foreach ( $input as $item ) {
+ $output[] = $item->toNative();
+ }
+
+ return $output;
+ case self::DNULL:
+ return null;
+ default:
+ throw new MWException( "Unknown type" );
+ }
+ }
+
+ /**
+ * @return bool
+ */
+ public function toBool() {
+ return self::castTypes( $this, self::DBOOL )->data;
+ }
+
+ /**
+ * @return string
+ */
+ public function toString() {
+ return self::castTypes( $this, self::DSTRING )->data;
+ }
+
+ /**
+ * @return float
+ */
+ public function toFloat() {
+ return self::castTypes( $this, self::DFLOAT )->data;
+ }
+
+ /**
+ * @return int
+ */
+ public function toInt() {
+ return self::castTypes( $this, self::DINT )->data;
+ }
+
+ /**
+ * @return int|float
+ */
+ public function toNumber() {
+ return $this->type == self::DINT ? $this->toInt() : $this->toFloat();
+ }
+
+ public function toList() {
+ return self::castTypes( $this, self::DLIST )->data;
+ }
+}
diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AFPException.php b/www/wiki/extensions/AbuseFilter/includes/parser/AFPException.php
new file mode 100644
index 00000000..51fe4442
--- /dev/null
+++ b/www/wiki/extensions/AbuseFilter/includes/parser/AFPException.php
@@ -0,0 +1,4 @@
+<?php
+
+class AFPException extends MWException {
+}
diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AFPParserState.php b/www/wiki/extensions/AbuseFilter/includes/parser/AFPParserState.php
new file mode 100644
index 00000000..7a4f5a73
--- /dev/null
+++ b/www/wiki/extensions/AbuseFilter/includes/parser/AFPParserState.php
@@ -0,0 +1,10 @@
+<?php
+
+class AFPParserState {
+ public $pos, $token;
+
+ public function __construct( $token, $pos ) {
+ $this->token = $token;
+ $this->pos = $pos;
+ }
+}
diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AFPToken.php b/www/wiki/extensions/AbuseFilter/includes/parser/AFPToken.php
new file mode 100644
index 00000000..2f7d9c99
--- /dev/null
+++ b/www/wiki/extensions/AbuseFilter/includes/parser/AFPToken.php
@@ -0,0 +1,61 @@
+<?php
+/**
+ * Abuse filter parser.
+ * Copyright © Victor Vasiliev, 2008.
+ * Based on ideas by Andrew Garrett
+ * Distributed under GNU GPL v2 terms.
+ *
+ * Types of token:
+ * * T_NONE - special-purpose token
+ * * T_BRACE - ( or )
+ * * T_COMMA - ,
+ * * T_OP - operator like + or ^
+ * * T_NUMBER - number
+ * * T_STRING - string, in "" or ''
+ * * T_KEYWORD - keyword
+ * * T_ID - identifier
+ * * T_STATEMENT_SEPARATOR - ;
+ * * T_SQUARE_BRACKETS - [ or ]
+ *
+ * Levels of parsing:
+ * * Entry - catches unexpected characters
+ * * Semicolon - ;
+ * * Set - :=
+ * * Conditionls (IF) - if-then-else-end, cond ? a :b
+ * * BoolOps (BO) - &, |, ^
+ * * CompOps (CO) - ==, !=, ===, !==, >, <, >=, <=
+ * * SumRel (SR) - +, -
+ * * MulRel (MR) - *, /, %
+ * * Pow (P) - **
+ * * BoolNeg (BN) - ! operation
+ * * SpecialOperators (SO) - in and like
+ * * Unarys (U) - plus and minus in cases like -5 or -(2 * +2)
+ * * ListElement (LE) - list[number]
+ * * Braces (B) - ( and )
+ * * Functions (F)
+ * * Atom (A) - return value
+ */
+class AFPToken {
+ // Types of tken
+ const TNONE = 'T_NONE';
+ const TID = 'T_ID';
+ const TKEYWORD = 'T_KEYWORD';
+ const TSTRING = 'T_STRING';
+ const TINT = 'T_INT';
+ const TFLOAT = 'T_FLOAT';
+ const TOP = 'T_OP';
+ const TBRACE = 'T_BRACE';
+ const TSQUAREBRACKET = 'T_SQUARE_BRACKET';
+ const TCOMMA = 'T_COMMA';
+ const TSTATEMENTSEPARATOR = 'T_STATEMENT_SEPARATOR';
+
+ public $type;
+ public $value;
+ public $pos;
+
+ public function __construct( $type = self::TNONE, $value = null, $pos = 0 ) {
+ $this->type = $type;
+ $this->value = $value;
+ $this->pos = $pos;
+ }
+}
diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AFPTreeNode.php b/www/wiki/extensions/AbuseFilter/includes/parser/AFPTreeNode.php
new file mode 100644
index 00000000..e185616c
--- /dev/null
+++ b/www/wiki/extensions/AbuseFilter/includes/parser/AFPTreeNode.php
@@ -0,0 +1,126 @@
+<?php
+/**
+ * Represents a node of a parser tree.
+ */
+class AFPTreeNode {
+ // Each of the constants below represents a node corresponding to a level
+ // of the parser, from the top of the tree to the bottom.
+
+ // ENTRY is always one-element and thus does not have its own node.
+
+ // SEMICOLON is a many-children node, denoting that the nodes have to be
+ // evaluated in order and the last value has to be returned.
+ const SEMICOLON = 'SEMICOLON';
+
+ // ASSIGNMENT (formerly known as SET) is a node which is responsible for
+ // assigning values to variables. ASSIGNMENT is a (variable name [string],
+ // value [tree node]) tuple, INDEX_ASSIGNMENT (which is used to assign
+ // values at list offsets) is a (variable name [string], index [tree node],
+ // value [tree node]) tuple, and LIST_APPEND has the form of (variable name
+ // [string], value [tree node]).
+ const ASSIGNMENT = 'ASSIGNMENT';
+ const INDEX_ASSIGNMENT = 'INDEX_ASSIGNMENT';
+ const LIST_APPEND = 'LIST_APPEND';
+
+ // CONDITIONAL represents both a ternary operator and an if-then-else-end
+ // construct. The format is (condition, evaluated-if-true,
+ // evaluated-in-false), all tree nodes.
+ const CONDITIONAL = 'CONDITIONAL';
+
+ // LOGIC is a logic operator accepted by AFPData::boolOp. The format is
+ // (operation, left operand, right operand).
+ const LOGIC = 'LOGIC';
+
+ // COMPARE is a comparison operator accepted by AFPData::boolOp. The format is
+ // (operation, left operand, right operand).
+ const COMPARE = 'COMPARE';
+
+ // SUM_REL is either '+' or '-'. The format is (operation, left operand,
+ // right operand).
+ const SUM_REL = 'SUM_REL';
+
+ // MUL_REL is a multiplication-related operation accepted by AFPData::mulRel.
+ // The format is (operation, left operand, right operand).
+ const MUL_REL = 'MUL_REL';
+
+ // POW is an exponentiation operator. The format is (base, exponent).
+ const POW = 'POW';
+
+ // BOOL_INVERT is a boolean inversion operator. The format is (operand).
+ const BOOL_INVERT = 'BOOL_INVERT';
+
+ // KEYWORD_OPERATOR is one of the binary keyword operators supported by the
+ // filter language. The format is (keyword, left operand, right operand).
+ const KEYWORD_OPERATOR = 'KEYWORD_OPERATOR';
+
+ // UNARY is either unary minus or unary plus. The format is (operator,
+ // operand).
+ const UNARY = 'UNARY';
+
+ // LIST_INDEX is an operation of accessing a list by an offset. The format
+ // is (list, offset).
+ const LIST_INDEX = 'LIST_INDEX';
+
+ // Since parenthesis only manipulate precedence of the operators, they are
+ // not explicitly represented in the tree.
+
+ // FUNCTION_CALL is an invocation of built-in function. The format is a
+ // tuple where the first element is a function name, and all subsequent
+ // elements are the arguments.
+ const FUNCTION_CALL = 'FUNCTION_CALL';
+
+ // LIST_DEFINITION is a list literal. The $children field contains tree
+ // nodes for the values of each of the list element used.
+ const LIST_DEFINITION = 'LIST_DEFINITION';
+
+ // ATOM is a node representing a literal. The only element of $children is a
+ // token corresponding to the literal.
+ const ATOM = 'ATOM';
+
+ /** @var string Type of the node, one of the constants above */
+ public $type;
+ /**
+ * Parameters of the value. Typically it is an array of children nodes,
+ * which might be either strings (for parametrization of the node) or another
+ * node. In case of ATOM it's a parser token.
+ * @var AFPTreeNode[]|string[]|AFPToken
+ */
+ public $children;
+
+ // Position used for error reporting.
+ public $position;
+
+ public function __construct( $type, $children, $position ) {
+ $this->type = $type;
+ $this->children = $children;
+ $this->position = $position;
+ }
+
+ public function toDebugString() {
+ return implode( "\n", $this->toDebugStringInner() );
+ }
+
+ private function toDebugStringInner() {
+ if ( $this->type == self::ATOM ) {
+ return [ "ATOM({$this->children->type} {$this->children->value})" ];
+ }
+
+ $align = function ( $line ) {
+ return ' ' . $line;
+ };
+
+ $lines = [ "{$this->type}" ];
+ foreach ( $this->children as $subnode ) {
+ if ( $subnode instanceof AFPTreeNode ) {
+ $sublines = array_map( $align, $subnode->toDebugStringInner() );
+ } elseif ( is_string( $subnode ) ) {
+ $sublines = [ " {$subnode}" ];
+ } else {
+ throw new AFPException( "Each node parameter has to be either a node or a string" );
+ }
+
+ $lines = array_merge( $lines, $sublines );
+ }
+ return $lines;
+ }
+}
diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AFPTreeParser.php b/www/wiki/extensions/AbuseFilter/includes/parser/AFPTreeParser.php
new file mode 100644
index 00000000..345adcb8
--- /dev/null
+++ b/www/wiki/extensions/AbuseFilter/includes/parser/AFPTreeParser.php
@@ -0,0 +1,611 @@
+<?php
+
+/**
+ * A version of the abuse filter parser that separates parsing the filter and
+ * evaluating it into different passes, allowing the parse tree to be cached.
+ *
+ * @file
+ */
+
+/**
+ * A parser that transforms the text of the filter into a parse tree.
+ */
+class AFPTreeParser {
+ // The tokenized representation of the filter parsed.
+ public $mTokens;
+
+ // Current token handled by the parser and its position.
+ public $mCur, $mPos;
+
+ const CACHE_VERSION = 2;
+
+ /**
+ * Create a new instance
+ */
+ public function __construct() {
+ $this->resetState();
+ }
+
+ public function resetState() {
+ $this->mTokens = [];
+ $this->mPos = 0;
+ }
+
+ /**
+ * Advances the parser to the next token in the filter code.
+ */
+ protected function move() {
+ list( $this->mCur, $this->mPos ) = $this->mTokens[$this->mPos];
+ }
+
+ /**
+ * getState() function allows parser state to be rollbacked to several tokens
+ * back.
+ *
+ * @return AFPParserState
+ */
+ protected function getState() {
+ return new AFPParserState( $this->mCur, $this->mPos );
+ }
+
+ /**
+ * setState() function allows parser state to be rollbacked to several tokens
+ * back.
+ *
+ * @param AFPParserState $state
+ */
+ protected function setState( AFPParserState $state ) {
+ $this->mCur = $state->token;
+ $this->mPos = $state->pos;
+ }
+
+ /**
+ * Parse the supplied filter source code into a tree.
+ *
+ * @param string $code
+ * @throws AFPUserVisibleException
+ * @return AFPTreeNode|null
+ */
+ public function parse( $code ) {
+ $this->mTokens = AbuseFilterTokenizer::tokenize( $code );
+ $this->mPos = 0;
+
+ return $this->doLevelEntry();
+ }
+
+ /* Levels */
+
+ /**
+ * Handles unexpected characters after the expression.
+ * @return AFPTreeNode|null
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelEntry() {
+ $result = $this->doLevelSemicolon();
+
+ if ( $this->mCur->type != AFPToken::TNONE ) {
+ throw new AFPUserVisibleException(
+ 'unexpectedatend',
+ $this->mPos, [ $this->mCur->type ]
+ );
+ }
+
+ return $result;
+ }
+
+ /**
+ * Handles the semicolon operator.
+ *
+ * @return AFPTreeNode|null
+ */
+ protected function doLevelSemicolon() {
+ $statements = [];
+
+ do {
+ $this->move();
+ $position = $this->mPos;
+
+ if ( $this->mCur->type == AFPToken::TNONE ) {
+ break;
+ }
+
+ // Allow empty statements.
+ if ( $this->mCur->type == AFPToken::TSTATEMENTSEPARATOR ) {
+ continue;
+ }
+
+ $statements[] = $this->doLevelSet();
+ $position = $this->mPos;
+ } while ( $this->mCur->type == AFPToken::TSTATEMENTSEPARATOR );
+
+ // Flatten the tree if possible.
+ if ( count( $statements ) == 0 ) {
+ return null;
+ } elseif ( count( $statements ) == 1 ) {
+ return $statements[0];
+ } else {
+ return new AFPTreeNode( AFPTreeNode::SEMICOLON, $statements, $position );
+ }
+ }
+
+ /**
+ * Handles variable assignment.
+ *
+ * @return AFPTreeNode
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelSet() {
+ if ( $this->mCur->type == AFPToken::TID ) {
+ $varname = $this->mCur->value;
+
+ // Speculatively parse the assignment statement assuming it can
+ // potentially be an assignment, but roll back if it isn't.
+ $initialState = $this->getState();
+ $this->move();
+
+ if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == ':=' ) {
+ $position = $this->mPos;
+ $this->move();
+ $value = $this->doLevelSet();
+
+ return new AFPTreeNode( AFPTreeNode::ASSIGNMENT, [ $varname, $value ], $position );
+ }
+
+ if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == '[' ) {
+ $this->move();
+
+ if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) {
+ $index = 'append';
+ } else {
+ // Parse index offset.
+ $this->setState( $initialState );
+ $this->move();
+ $index = $this->doLevelSemicolon();
+ if ( !( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound', $this->mPos,
+ [ ']', $this->mCur->type, $this->mCur->value ] );
+ }
+ }
+
+ $this->move();
+ if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == ':=' ) {
+ $position = $this->mPos;
+ $this->move();
+ $value = $this->doLevelSet();
+ if ( $index === 'append' ) {
+ return new AFPTreeNode(
+ AFPTreeNode::LIST_APPEND, [ $varname, $value ], $position );
+ } else {
+ return new AFPTreeNode(
+ AFPTreeNode::INDEX_ASSIGNMENT,
+ [ $varname, $index, $value ],
+ $position
+ );
+ }
+ }
+ }
+
+ // If we reached this point, we did not find an assignment. Roll back
+ // and assume this was just a literal.
+ $this->setState( $initialState );
+ }
+
+ return $this->doLevelConditions();
+ }
+
+ /**
+ * Handles ternary operator and if-then-else-end.
+ *
+ * @return AFPTreeNode
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelConditions() {
+ if ( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'if' ) {
+ $position = $this->mPos;
+ $this->move();
+ $condition = $this->doLevelBoolOps();
+
+ if ( !( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'then' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mPos,
+ [
+ 'then',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+
+ $valueIfTrue = $this->doLevelConditions();
+
+ if ( !( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'else' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mPos,
+ [
+ 'else',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+
+ $valueIfFalse = $this->doLevelConditions();
+
+ if ( !( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'end' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mPos,
+ [
+ 'end',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+
+ return new AFPTreeNode(
+ AFPTreeNode::CONDITIONAL,
+ [ $condition, $valueIfTrue, $valueIfFalse ],
+ $position
+ );
+ }
+
+ $condition = $this->doLevelBoolOps();
+ if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == '?' ) {
+ $position = $this->mPos;
+ $this->move();
+
+ $valueIfTrue = $this->doLevelConditions();
+ if ( !( $this->mCur->type == AFPToken::TOP && $this->mCur->value == ':' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mPos,
+ [
+ ':',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+
+ $valueIfFalse = $this->doLevelConditions();
+ return new AFPTreeNode(
+ AFPTreeNode::CONDITIONAL,
+ [ $condition, $valueIfTrue, $valueIfFalse ],
+ $position
+ );
+ }
+
+ return $condition;
+ }
+
+ /**
+ * Handles logic operators.
+ *
+ * @return AFPTreeNode
+ */
+ protected function doLevelBoolOps() {
+ $leftOperand = $this->doLevelCompares();
+ $ops = [ '&', '|', '^' ];
+ while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
+ $op = $this->mCur->value;
+ $position = $this->mPos;
+ $this->move();
+
+ $rightOperand = $this->doLevelCompares();
+
+ $leftOperand = new AFPTreeNode(
+ AFPTreeNode::LOGIC,
+ [ $op, $leftOperand, $rightOperand ],
+ $position
+ );
+ }
+ return $leftOperand;
+ }
+
+ /**
+ * Handles comparison operators.
+ *
+ * @return AFPTreeNode
+ */
+ protected function doLevelCompares() {
+ $leftOperand = $this->doLevelSumRels();
+ $ops = [ '==', '===', '!=', '!==', '<', '>', '<=', '>=', '=' ];
+ while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
+ $op = $this->mCur->value;
+ $position = $this->mPos;
+ $this->move();
+ $rightOperand = $this->doLevelSumRels();
+ $leftOperand = new AFPTreeNode(
+ AFPTreeNode::COMPARE,
+ [ $op, $leftOperand, $rightOperand ],
+ $position
+ );
+ }
+ return $leftOperand;
+ }
+
+ /**
+ * Handle addition and subtraction.
+ *
+ * @return AFPTreeNode
+ */
+ protected function doLevelSumRels() {
+ $leftOperand = $this->doLevelMulRels();
+ $ops = [ '+', '-' ];
+ while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
+ $op = $this->mCur->value;
+ $position = $this->mPos;
+ $this->move();
+ $rightOperand = $this->doLevelMulRels();
+ $leftOperand = new AFPTreeNode(
+ AFPTreeNode::SUM_REL,
+ [ $op, $leftOperand, $rightOperand ],
+ $position
+ );
+ }
+ return $leftOperand;
+ }
+
+ /**
+ * Handles multiplication and division.
+ *
+ * @return AFPTreeNode
+ */
+ protected function doLevelMulRels() {
+ $leftOperand = $this->doLevelPow();
+ $ops = [ '*', '/', '%' ];
+ while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
+ $op = $this->mCur->value;
+ $position = $this->mPos;
+ $this->move();
+ $rightOperand = $this->doLevelPow();
+ $leftOperand = new AFPTreeNode(
+ AFPTreeNode::MUL_REL,
+ [ $op, $leftOperand, $rightOperand ],
+ $position
+ );
+ }
+ return $leftOperand;
+ }
+
+ /**
+ * Handles exponentiation.
+ *
+ * @return AFPTreeNode
+ */
+ protected function doLevelPow() {
+ $base = $this->doLevelBoolInvert();
+ while ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == '**' ) {
+ $position = $this->mPos;
+ $this->move();
+ $exponent = $this->doLevelBoolInvert();
+ $base = new AFPTreeNode( AFPTreeNode::POW, [ $base, $exponent ], $position );
+ }
+ return $base;
+ }
+
+ /**
+ * Handles boolean inversion.
+ *
+ * @return AFPTreeNode
+ */
+ protected function doLevelBoolInvert() {
+ if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == '!' ) {
+ $position = $this->mPos;
+ $this->move();
+ $argument = $this->doLevelKeywordOperators();
+ return new AFPTreeNode( AFPTreeNode::BOOL_INVERT, [ $argument ], $position );
+ }
+
+ return $this->doLevelKeywordOperators();
+ }
+
+ /**
+ * Handles keyword operators.
+ *
+ * @return AFPTreeNode
+ */
+ protected function doLevelKeywordOperators() {
+ $leftOperand = $this->doLevelUnarys();
+ $keyword = strtolower( $this->mCur->value );
+ if ( $this->mCur->type == AFPToken::TKEYWORD &&
+ in_array( $keyword, array_keys( AbuseFilterParser::$mKeywords ) )
+ ) {
+ $position = $this->mPos;
+ $this->move();
+ $rightOperand = $this->doLevelUnarys();
+
+ return new AFPTreeNode(
+ AFPTreeNode::KEYWORD_OPERATOR,
+ [ $keyword, $leftOperand, $rightOperand ],
+ $position
+ );
+ }
+
+ return $leftOperand;
+ }
+
+ /**
+ * Handles unary operators.
+ *
+ * @return AFPTreeNode
+ */
+ protected function doLevelUnarys() {
+ $op = $this->mCur->value;
+ if ( $this->mCur->type == AFPToken::TOP && ( $op == "+" || $op == "-" ) ) {
+ $position = $this->mPos;
+ $this->move();
+ $argument = $this->doLevelListElements();
+ return new AFPTreeNode( AFPTreeNode::UNARY, [ $op, $argument ], $position );
+ }
+ return $this->doLevelListElements();
+ }
+
+ /**
+ * Handles accessing a list element by an offset.
+ *
+ * @return AFPTreeNode
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelListElements() {
+ $list = $this->doLevelParenthesis();
+ while ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == '[' ) {
+ $position = $this->mPos;
+ $index = $this->doLevelSemicolon();
+ $list = new AFPTreeNode( AFPTreeNode::LIST_INDEX, [ $list, $index ], $position );
+
+ if ( !( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound', $this->mPos,
+ [ ']', $this->mCur->type, $this->mCur->value ] );
+ }
+ $this->move();
+ }
+
+ return $list;
+ }
+
+ /**
+ * Handles parenthesis.
+ *
+ * @return AFPTreeNode
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelParenthesis() {
+ if ( $this->mCur->type == AFPToken::TBRACE && $this->mCur->value == '(' ) {
+ $result = $this->doLevelSemicolon();
+
+ if ( !( $this->mCur->type == AFPToken::TBRACE && $this->mCur->value == ')' ) ) {
+ throw new AFPUserVisibleException(
+ 'expectednotfound',
+ $this->mPos,
+ [ ')', $this->mCur->type, $this->mCur->value ]
+ );
+ }
+ $this->move();
+
+ return $result;
+ }
+
+ return $this->doLevelFunction();
+ }
+
+ /**
+ * Handles function calls.
+ *
+ * @return AFPTreeNode
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelFunction() {
+ if ( $this->mCur->type == AFPToken::TID &&
+ isset( AbuseFilterParser::$mFunctions[$this->mCur->value] )
+ ) {
+ $func = $this->mCur->value;
+ $position = $this->mPos;
+ $this->move();
+ if ( $this->mCur->type != AFPToken::TBRACE || $this->mCur->value != '(' ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mPos,
+ [
+ '(',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+
+ $args = [];
+ do {
+ $args[] = $this->doLevelSemicolon();
+ } while ( $this->mCur->type == AFPToken::TCOMMA );
+
+ if ( $this->mCur->type != AFPToken::TBRACE || $this->mCur->value != ')' ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mPos,
+ [
+ ')',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+
+ array_unshift( $args, $func );
+ return new AFPTreeNode( AFPTreeNode::FUNCTION_CALL, $args, $position );
+ }
+
+ return $this->doLevelAtom();
+ }
+
+ /**
+ * Handle literals.
+ * @return AFPTreeNode
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelAtom() {
+ $tok = $this->mCur->value;
+ switch ( $this->mCur->type ) {
+ case AFPToken::TID:
+ case AFPToken::TSTRING:
+ case AFPToken::TFLOAT:
+ case AFPToken::TINT:
+ $result = new AFPTreeNode( AFPTreeNode::ATOM, $this->mCur, $this->mPos );
+ break;
+ case AFPToken::TKEYWORD:
+ if ( in_array( $tok, [ "true", "false", "null" ] ) ) {
+ $result = new AFPTreeNode( AFPTreeNode::ATOM, $this->mCur, $this->mPos );
+ break;
+ }
+
+ throw new AFPUserVisibleException(
+ 'unrecognisedkeyword',
+ $this->mPos,
+ [ $tok ]
+ );
+ /** @noinspection PhpMissingBreakStatementInspection */
+ case AFPToken::TSQUAREBRACKET:
+ if ( $this->mCur->value == '[' ) {
+ $list = [];
+ while ( true ) {
+ $this->move();
+ if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) {
+ break;
+ }
+
+ $list[] = $this->doLevelSet();
+
+ if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) {
+ break;
+ }
+ if ( $this->mCur->type != AFPToken::TCOMMA ) {
+ throw new AFPUserVisibleException(
+ 'expectednotfound',
+ $this->mPos,
+ [ ', or ]', $this->mCur->type, $this->mCur->value ]
+ );
+ }
+ }
+
+ $result = new AFPTreeNode( AFPTreeNode::LIST_DEFINITION, $list, $this->mPos );
+ break;
+ }
+
+ // Fallthrough expected
+ default:
+ throw new AFPUserVisibleException(
+ 'unexpectedtoken',
+ $this->mPos,
+ [
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+
+ $this->move();
+ return $result;
+ }
+}
diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AFPUserVisibleException.php b/www/wiki/extensions/AbuseFilter/includes/parser/AFPUserVisibleException.php
new file mode 100644
index 00000000..b6e89d03
--- /dev/null
+++ b/www/wiki/extensions/AbuseFilter/includes/parser/AFPUserVisibleException.php
@@ -0,0 +1,40 @@
+<?php
+
+// Exceptions that we might conceivably want to report to ordinary users
+// (i.e. exceptions that don't represent bugs in the extension itself)
+class AFPUserVisibleException extends AFPException {
+ public $mExceptionId;
+ public $mPosition;
+ public $mParams;
+
+ /**
+ * @param string $exception_id
+ * @param int $position
+ * @param array $params
+ */
+ function __construct( $exception_id, $position, $params ) {
+ $this->mExceptionID = $exception_id;
+ $this->mPosition = $position;
+ $this->mParams = $params;
+
+ // Exception message text for logs should be in English.
+ $msg = $this->getMessageObj()->inLanguage( 'en' )->useDatabase( false )->text();
+ parent::__construct( $msg );
+ }
+
+ public function getMessageObj() {
+ // Give grep a chance to find the usages:
+ // abusefilter-exception-unexpectedatend, abusefilter-exception-expectednotfound
+ // abusefilter-exception-unrecognisedkeyword, abusefilter-exception-unexpectedtoken
+ // abusefilter-exception-unclosedstring, abusefilter-exception-invalidoperator
+ // abusefilter-exception-unrecognisedtoken, abusefilter-exception-noparams
+ // abusefilter-exception-dividebyzero, abusefilter-exception-unrecognisedvar
+ // abusefilter-exception-notenoughargs, abusefilter-exception-regexfailure
+ // abusefilter-exception-overridebuiltin, abusefilter-exception-outofbounds
+ // abusefilter-exception-notlist, abusefilter-exception-unclosedcomment
+ return wfMessage(
+ 'abusefilter-exception-' . $this->mExceptionID,
+ array_merge( [ $this->mPosition ], $this->mParams )
+ );
+ }
+}
diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterCachingParser.php b/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterCachingParser.php
new file mode 100644
index 00000000..37384356
--- /dev/null
+++ b/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterCachingParser.php
@@ -0,0 +1,279 @@
+<?php
+/**
+ * AbuseFilterCachingParser is the version of AbuseFilterParser which parses
+ * the code into an abstract syntax tree before evaluating it, and caches that
+ * tree.
+ *
+ * It currently inherits AbuseFilterParser in order to avoid code duplication.
+ * In future, this code will replace current AbuseFilterParser entirely.
+ */
+class AbuseFilterCachingParser extends AbuseFilterParser {
+ /**
+ * Return the generated version of the parser for cache invalidation
+ * purposes. Automatically tracks list of all functions and invalidates the
+ * cache if it is changed.
+ * @return string
+ */
+ public static function getCacheVersion() {
+ static $version = null;
+ if ( $version !== null ) {
+ return $version;
+ }
+
+ $versionKey = [
+ AFPTreeParser::CACHE_VERSION,
+ AbuseFilterTokenizer::CACHE_VERSION,
+ array_keys( AbuseFilterParser::$mFunctions ),
+ array_keys( AbuseFilterParser::$mKeywords ),
+ ];
+ $version = hash( 'sha256', serialize( $versionKey ) );
+
+ return $version;
+ }
+
+ public function resetState() {
+ $this->mVars = new AbuseFilterVariableHolder;
+ $this->mCur = new AFPToken();
+ }
+
+ public function intEval( $code ) {
+ static $cache = null;
+ if ( !$cache ) {
+ $cache = ObjectCache::getLocalServerInstance( 'hash' );
+ }
+
+ $tree = $cache->getWithSetCallback(
+ $cache->makeGlobalKey(
+ __CLASS__,
+ self::getCacheVersion(),
+ hash( 'sha256', $code )
+ ),
+ $cache::TTL_DAY,
+ function () use ( $code ) {
+ $parser = new AFPTreeParser();
+ return $parser->parse( $code ) ?: false;
+ }
+ );
+
+ return $tree
+ ? $this->evalNode( $tree )
+ : new AFPData( AFPData::DNULL, null );
+ }
+
+ /**
+ * Evaluate the value of the specified AST node.
+ *
+ * @param AFPTreeNode $node The node to evaluate.
+ * @return AFPData
+ * @throws AFPException
+ * @throws AFPUserVisibleException
+ * @throws MWException
+ */
+ public function evalNode( AFPTreeNode $node ) {
+ // A lot of AbuseFilterParser features rely on $this->mCur->pos or
+ // $this->mPos for error reporting.
+ // FIXME: this is a hack which needs to be removed when the parsers are
+ // merged.
+ $this->mPos = $node->position;
+ $this->mCur->pos = $node->position;
+
+ switch ( $node->type ) {
+ case AFPTreeNode::ATOM:
+ $tok = $node->children;
+ switch ( $tok->type ) {
+ case AFPToken::TID:
+ return $this->getVarValue( strtolower( $tok->value ) );
+ case AFPToken::TSTRING:
+ return new AFPData( AFPData::DSTRING, $tok->value );
+ case AFPToken::TFLOAT:
+ return new AFPData( AFPData::DFLOAT, $tok->value );
+ case AFPToken::TINT:
+ return new AFPData( AFPData::DINT, $tok->value );
+ /** @noinspection PhpMissingBreakStatementInspection */
+ case AFPToken::TKEYWORD:
+ switch ( $tok->value ) {
+ case "true":
+ return new AFPData( AFPData::DBOOL, true );
+ case "false":
+ return new AFPData( AFPData::DBOOL, false );
+ case "null":
+ return new AFPData();
+ }
+ // Fallthrough intended
+ default:
+ throw new AFPException( "Unknown token provided in the ATOM node" );
+ }
+ case AFPTreeNode::LIST_DEFINITION:
+ $items = array_map( [ $this, 'evalNode' ], $node->children );
+ return new AFPData( AFPData::DLIST, $items );
+
+ case AFPTreeNode::FUNCTION_CALL:
+ $functionName = $node->children[0];
+ $args = array_slice( $node->children, 1 );
+
+ $func = self::$mFunctions[$functionName];
+ $dataArgs = array_map( [ $this, 'evalNode' ], $args );
+
+ /** @noinspection PhpToStringImplementationInspection */
+ $funcHash = md5( $func . serialize( $dataArgs ) );
+
+ if ( isset( self::$funcCache[$funcHash] ) &&
+ !in_array( $func, self::$ActiveFunctions )
+ ) {
+ $result = self::$funcCache[$funcHash];
+ } else {
+ AbuseFilter::triggerLimiter();
+ $result = self::$funcCache[$funcHash] = $this->$func( $dataArgs );
+ }
+
+ if ( count( self::$funcCache ) > 1000 ) {
+ self::$funcCache = [];
+ }
+
+ return $result;
+
+ case AFPTreeNode::LIST_INDEX:
+ list( $list, $offset ) = $node->children;
+
+ $list = $this->evalNode( $list );
+ if ( $list->type != AFPData::DLIST ) {
+ throw new AFPUserVisibleException( 'notlist', $node->position, [] );
+ }
+
+ $offset = $this->evalNode( $offset )->toInt();
+
+ $list = $list->toList();
+ if ( count( $list ) <= $offset ) {
+ throw new AFPUserVisibleException( 'outofbounds', $node->position,
+ [ $offset, count( $list ) ] );
+ }
+
+ return $list[$offset];
+
+ case AFPTreeNode::UNARY:
+ list( $operation, $argument ) = $node->children;
+ $argument = $this->evalNode( $argument );
+ if ( $operation == '-' ) {
+ return AFPData::unaryMinus( $argument );
+ }
+ return $argument;
+
+ case AFPTreeNode::KEYWORD_OPERATOR:
+ list( $keyword, $leftOperand, $rightOperand ) = $node->children;
+ $func = self::$mKeywords[$keyword];
+ $leftOperand = $this->evalNode( $leftOperand );
+ $rightOperand = $this->evalNode( $rightOperand );
+
+ AbuseFilter::triggerLimiter();
+ $result = AFPData::$func( $leftOperand, $rightOperand, $node->position );
+
+ return $result;
+ case AFPTreeNode::BOOL_INVERT:
+ list( $argument ) = $node->children;
+ $argument = $this->evalNode( $argument );
+ return AFPData::boolInvert( $argument );
+
+ case AFPTreeNode::POW:
+ list( $base, $exponent ) = $node->children;
+ $base = $this->evalNode( $base );
+ $exponent = $this->evalNode( $exponent );
+ return AFPData::pow( $base, $exponent );
+
+ case AFPTreeNode::MUL_REL:
+ list( $op, $leftOperand, $rightOperand ) = $node->children;
+ $leftOperand = $this->evalNode( $leftOperand );
+ $rightOperand = $this->evalNode( $rightOperand );
+ return AFPData::mulRel( $leftOperand, $rightOperand, $op, /* FIXME */
+ 0 );
+
+ case AFPTreeNode::SUM_REL:
+ list( $op, $leftOperand, $rightOperand ) = $node->children;
+ $leftOperand = $this->evalNode( $leftOperand );
+ $rightOperand = $this->evalNode( $rightOperand );
+ switch ( $op ) {
+ case '+':
+ return AFPData::sum( $leftOperand, $rightOperand );
+ case '-':
+ return AFPData::sub( $leftOperand, $rightOperand );
+ default:
+ throw new AFPException( "Unknown sum-related operator: {$op}" );
+ }
+
+ case AFPTreeNode::COMPARE:
+ list( $op, $leftOperand, $rightOperand ) = $node->children;
+ $leftOperand = $this->evalNode( $leftOperand );
+ $rightOperand = $this->evalNode( $rightOperand );
+ AbuseFilter::triggerLimiter();
+ return AFPData::compareOp( $leftOperand, $rightOperand, $op );
+
+ case AFPTreeNode::LOGIC:
+ list( $op, $leftOperand, $rightOperand ) = $node->children;
+ $leftOperand = $this->evalNode( $leftOperand );
+ $value = $leftOperand->toBool();
+ // Short-circuit.
+ if ( ( !$value && $op == '&' ) || ( $value && $op == '|' ) ) {
+ return $leftOperand;
+ }
+ $rightOperand = $this->evalNode( $rightOperand );
+ return AFPData::boolOp( $leftOperand, $rightOperand, $op );
+
+ case AFPTreeNode::CONDITIONAL:
+ list( $condition, $valueIfTrue, $valueIfFalse ) = $node->children;
+ $condition = $this->evalNode( $condition );
+ if ( $condition->toBool() ) {
+ return $this->evalNode( $valueIfTrue );
+ } else {
+ return $this->evalNode( $valueIfFalse );
+ }
+
+ case AFPTreeNode::ASSIGNMENT:
+ list( $varName, $value ) = $node->children;
+ $value = $this->evalNode( $value );
+ $this->setUserVariable( $varName, $value );
+ return $value;
+
+ case AFPTreeNode::INDEX_ASSIGNMENT:
+ list( $varName, $offset, $value ) = $node->children;
+
+ $list = $this->mVars->getVar( $varName );
+ if ( $list->type != AFPData::DLIST ) {
+ throw new AFPUserVisibleException( 'notlist', $node->position, [] );
+ }
+
+ $offset = $this->evalNode( $offset )->toInt();
+
+ $list = $list->toList();
+ if ( count( $list ) <= $offset ) {
+ throw new AFPUserVisibleException( 'outofbounds', $node->position,
+ [ $offset, count( $list ) ] );
+ }
+
+ $list[$offset] = $this->evalNode( $value );
+ $this->setUserVariable( $varName, new AFPData( AFPData::DLIST, $list ) );
+ return $value;
+
+ case AFPTreeNode::LIST_APPEND:
+ list( $varName, $value ) = $node->children;
+
+ $list = $this->mVars->getVar( $varName );
+ if ( $list->type != AFPData::DLIST ) {
+ throw new AFPUserVisibleException( 'notlist', $node->position, [] );
+ }
+
+ $list = $list->toList();
+ $list[] = $this->evalNode( $value );
+ $this->setUserVariable( $varName, new AFPData( AFPData::DLIST, $list ) );
+ return $value;
+
+ case AFPTreeNode::SEMICOLON:
+ $lastValue = null;
+ foreach ( $node->children as $statement ) {
+ $lastValue = $this->evalNode( $statement );
+ }
+
+ return $lastValue;
+ default:
+ throw new AFPException( "Unknown node type passed: {$node->type}" );
+ }
+ }
+}
diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterParser.php b/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterParser.php
new file mode 100644
index 00000000..50f8dddc
--- /dev/null
+++ b/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterParser.php
@@ -0,0 +1,1560 @@
+<?php
+
+use Wikimedia\Equivset\Equivset;
+
+class AbuseFilterParser {
+ public $mCode, $mTokens, $mPos, $mCur, $mShortCircuit, $mAllowShort, $mLen;
+
+ /**
+ * @var AbuseFilterVariableHolder
+ */
+ public $mVars;
+
+ // length,lcase,ucase,ccnorm,rmdoubles,specialratio,rmspecials,norm,count,get_matches
+ public static $mFunctions = [
+ 'lcase' => 'funcLc',
+ 'ucase' => 'funcUc',
+ 'length' => 'funcLen',
+ 'string' => 'castString',
+ 'int' => 'castInt',
+ 'float' => 'castFloat',
+ 'bool' => 'castBool',
+ 'norm' => 'funcNorm',
+ 'ccnorm' => 'funcCCNorm',
+ 'ccnorm_contains_any' => 'funcCCNormContainsAny',
+ 'ccnorm_contains_all' => 'funcCCNormContainsAll',
+ 'specialratio' => 'funcSpecialRatio',
+ 'rmspecials' => 'funcRMSpecials',
+ 'rmdoubles' => 'funcRMDoubles',
+ 'rmwhitespace' => 'funcRMWhitespace',
+ 'count' => 'funcCount',
+ 'rcount' => 'funcRCount',
+ 'get_matches' => 'funcGetMatches',
+ 'ip_in_range' => 'funcIPInRange',
+ 'contains_any' => 'funcContainsAny',
+ 'contains_all' => 'funcContainsAll',
+ 'substr' => 'funcSubstr',
+ 'strlen' => 'funcLen',
+ 'strpos' => 'funcStrPos',
+ 'str_replace' => 'funcStrReplace',
+ 'rescape' => 'funcStrRegexEscape',
+ 'set' => 'funcSetVar',
+ 'set_var' => 'funcSetVar',
+ ];
+
+ // Functions that affect parser state, and shouldn't be cached.
+ public static $ActiveFunctions = [
+ 'funcSetVar',
+ ];
+
+ public static $mKeywords = [
+ 'in' => 'keywordIn',
+ 'like' => 'keywordLike',
+ 'matches' => 'keywordLike',
+ 'contains' => 'keywordContains',
+ 'rlike' => 'keywordRegex',
+ 'irlike' => 'keywordRegexInsensitive',
+ 'regex' => 'keywordRegex',
+ ];
+
+ public static $funcCache = [];
+
+ /**
+ * @var Equivset
+ */
+ protected static $equivset;
+
+ /**
+ * Create a new instance
+ *
+ * @param AbuseFilterVariableHolder $vars
+ */
+ public function __construct( $vars = null ) {
+ $this->resetState();
+ if ( $vars instanceof AbuseFilterVariableHolder ) {
+ $this->mVars = $vars;
+ }
+ }
+
+ public function resetState() {
+ $this->mCode = '';
+ $this->mTokens = [];
+ $this->mVars = new AbuseFilterVariableHolder;
+ $this->mPos = 0;
+ $this->mShortCircuit = false;
+ $this->mAllowShort = true;
+ }
+
+ /**
+ * @param string $filter
+ * @return array|bool
+ */
+ public function checkSyntax( $filter ) {
+ try {
+ $origAS = $this->mAllowShort;
+ $this->mAllowShort = false;
+ $this->parse( $filter );
+ } catch ( AFPUserVisibleException $excep ) {
+ $this->mAllowShort = $origAS;
+
+ return [ $excep->getMessageObj()->text(), $excep->mPosition ];
+ }
+ $this->mAllowShort = $origAS;
+
+ return true;
+ }
+
+ /**
+ * @param string $name
+ * @param mixed $value
+ */
+ public function setVar( $name, $value ) {
+ $this->mVars->setVar( $name, $value );
+ }
+
+ /**
+ * @param mixed $vars
+ */
+ public function setVars( $vars ) {
+ if ( is_array( $vars ) ) {
+ foreach ( $vars as $name => $var ) {
+ $this->setVar( $name, $var );
+ }
+ } elseif ( $vars instanceof AbuseFilterVariableHolder ) {
+ $this->mVars->addHolders( $vars );
+ }
+ }
+
+ /**
+ * @return AFPToken
+ */
+ protected function move() {
+ list( $this->mCur, $this->mPos ) = $this->mTokens[$this->mPos];
+ }
+
+ /**
+ * getState() function allows parser state to be rollbacked to several tokens back
+ * @return AFPParserState
+ */
+ protected function getState() {
+ return new AFPParserState( $this->mCur, $this->mPos );
+ }
+
+ /**
+ * setState() function allows parser state to be rollbacked to several tokens back
+ * @param AFPParserState $state
+ */
+ protected function setState( AFPParserState $state ) {
+ $this->mCur = $state->token;
+ $this->mPos = $state->pos;
+ }
+
+ /**
+ * @return mixed
+ * @throws AFPUserVisibleException
+ */
+ protected function skipOverBraces() {
+ if ( !( $this->mCur->type == AFPToken::TBRACE && $this->mCur->value == '(' ) ||
+ !$this->mShortCircuit
+ ) {
+ return;
+ }
+
+ $braces = 1;
+ while ( $this->mCur->type != AFPToken::TNONE && $braces > 0 ) {
+ $this->move();
+ if ( $this->mCur->type == AFPToken::TBRACE ) {
+ if ( $this->mCur->value == '(' ) {
+ $braces++;
+ } elseif ( $this->mCur->value == ')' ) {
+ $braces--;
+ }
+ }
+ }
+ if ( !( $this->mCur->type == AFPToken::TBRACE && $this->mCur->value == ')' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos, [ ')' ] );
+ }
+ }
+
+ /**
+ * @param string $code
+ * @return bool
+ */
+ public function parse( $code ) {
+ return $this->intEval( $code )->toBool();
+ }
+
+ /**
+ * @param string $filter
+ * @return string
+ */
+ public function evaluateExpression( $filter ) {
+ return $this->intEval( $filter )->toString();
+ }
+
+ /**
+ * @param string $code
+ * @return AFPData
+ */
+ public function intEval( $code ) {
+ // Setup, resetting
+ $this->mCode = $code;
+ $this->mTokens = AbuseFilterTokenizer::tokenize( $code );
+ $this->mPos = 0;
+ $this->mLen = strlen( $code );
+ $this->mShortCircuit = false;
+
+ $result = new AFPData();
+ $this->doLevelEntry( $result );
+
+ return $result;
+ }
+
+ /**
+ * @param string $a
+ * @param string $b
+ * @return int
+ */
+ static function lengthCompare( $a, $b ) {
+ if ( strlen( $a ) == strlen( $b ) ) {
+ return 0;
+ }
+
+ return ( strlen( $a ) < strlen( $b ) ) ? -1 : 1;
+ }
+
+ /* Levels */
+
+ /**
+ * Handles unexpected characters after the expression
+ *
+ * @param AFPData &$result
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelEntry( &$result ) {
+ $this->doLevelSemicolon( $result );
+
+ if ( $this->mCur->type != AFPToken::TNONE ) {
+ throw new AFPUserVisibleException(
+ 'unexpectedatend',
+ $this->mCur->pos, [ $this->mCur->type ]
+ );
+ }
+ }
+
+ /**
+ * Handles multiple expressions
+ * @param AFPData &$result
+ */
+ protected function doLevelSemicolon( &$result ) {
+ do {
+ $this->move();
+ if ( $this->mCur->type != AFPToken::TSTATEMENTSEPARATOR ) {
+ $this->doLevelSet( $result );
+ }
+ } while ( $this->mCur->type == AFPToken::TSTATEMENTSEPARATOR );
+ }
+
+ /**
+ * Handles multiple expressions
+ *
+ * @param AFPData &$result
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelSet( &$result ) {
+ if ( $this->mCur->type == AFPToken::TID ) {
+ $varname = $this->mCur->value;
+ $prev = $this->getState();
+ $this->move();
+
+ if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == ':=' ) {
+ $this->move();
+ $this->doLevelSet( $result );
+ $this->setUserVariable( $varname, $result );
+
+ return;
+ } elseif ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == '[' ) {
+ if ( !$this->mVars->varIsSet( $varname ) ) {
+ throw new AFPUserVisibleException( 'unrecognisedvar',
+ $this->mCur->pos,
+ [ $varname ]
+ );
+ }
+ $list = $this->mVars->getVar( $varname );
+ if ( $list->type != AFPData::DLIST ) {
+ throw new AFPUserVisibleException( 'notlist', $this->mCur->pos, [] );
+ }
+ $list = $list->toList();
+ $this->move();
+ if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) {
+ $idx = 'new';
+ } else {
+ $this->setState( $prev );
+ $this->move();
+ $idx = new AFPData();
+ $this->doLevelSemicolon( $idx );
+ $idx = $idx->toInt();
+ if ( !( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos,
+ [ ']', $this->mCur->type, $this->mCur->value ] );
+ }
+ if ( count( $list ) <= $idx ) {
+ throw new AFPUserVisibleException( 'outofbounds', $this->mCur->pos,
+ [ $idx, count( $result->data ) ] );
+ }
+ }
+ $this->move();
+ if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == ':=' ) {
+ $this->move();
+ $this->doLevelSet( $result );
+ if ( $idx === 'new' ) {
+ $list[] = $result;
+ } else {
+ $list[$idx] = $result;
+ }
+ $this->setUserVariable( $varname, new AFPData( AFPData::DLIST, $list ) );
+
+ return;
+ } else {
+ $this->setState( $prev );
+ }
+ } else {
+ $this->setState( $prev );
+ }
+ }
+ $this->doLevelConditions( $result );
+ }
+
+ /**
+ * @param AFPData &$result
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelConditions( &$result ) {
+ if ( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'if' ) {
+ $this->move();
+ $this->doLevelBoolOps( $result );
+
+ if ( !( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'then' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mCur->pos,
+ [
+ 'then',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+
+ $r1 = new AFPData();
+ $r2 = new AFPData();
+
+ $isTrue = $result->toBool();
+
+ if ( !$isTrue ) {
+ $scOrig = $this->mShortCircuit;
+ $this->mShortCircuit = $this->mAllowShort;
+ }
+ $this->doLevelConditions( $r1 );
+ if ( !$isTrue ) {
+ $this->mShortCircuit = $scOrig;
+ }
+
+ if ( !( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'else' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mCur->pos,
+ [
+ 'else',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+
+ if ( $isTrue ) {
+ $scOrig = $this->mShortCircuit;
+ $this->mShortCircuit = $this->mAllowShort;
+ }
+ $this->doLevelConditions( $r2 );
+ if ( $isTrue ) {
+ $this->mShortCircuit = $scOrig;
+ }
+
+ if ( !( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'end' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mCur->pos,
+ [
+ 'end',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+
+ if ( $result->toBool() ) {
+ $result = $r1;
+ } else {
+ $result = $r2;
+ }
+ } else {
+ $this->doLevelBoolOps( $result );
+ if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == '?' ) {
+ $this->move();
+ $r1 = new AFPData();
+ $r2 = new AFPData();
+
+ $isTrue = $result->toBool();
+
+ if ( !$isTrue ) {
+ $scOrig = $this->mShortCircuit;
+ $this->mShortCircuit = $this->mAllowShort;
+ }
+ $this->doLevelConditions( $r1 );
+ if ( !$isTrue ) {
+ $this->mShortCircuit = $scOrig;
+ }
+
+ if ( !( $this->mCur->type == AFPToken::TOP && $this->mCur->value == ':' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mCur->pos,
+ [
+ ':',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+
+ if ( $isTrue ) {
+ $scOrig = $this->mShortCircuit;
+ $this->mShortCircuit = $this->mAllowShort;
+ }
+ $this->doLevelConditions( $r2 );
+ if ( $isTrue ) {
+ $this->mShortCircuit = $scOrig;
+ }
+
+ if ( $isTrue ) {
+ $result = $r1;
+ } else {
+ $result = $r2;
+ }
+ }
+ }
+ }
+
+ /**
+ * @param AFPData &$result
+ */
+ protected function doLevelBoolOps( &$result ) {
+ $this->doLevelCompares( $result );
+ $ops = [ '&', '|', '^' ];
+ while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
+ $op = $this->mCur->value;
+ $this->move();
+ $r2 = new AFPData();
+
+ // We can go on quickly as either one statement with | is true or on with & is false
+ if ( ( $op == '&' && !$result->toBool() ) || ( $op == '|' && $result->toBool() ) ) {
+ $orig = $this->mShortCircuit;
+ $this->mShortCircuit = $this->mAllowShort;
+ $this->doLevelCompares( $r2 );
+ $this->mShortCircuit = $orig;
+ $result = new AFPData( AFPData::DBOOL, $result->toBool() );
+ continue;
+ }
+
+ $this->doLevelCompares( $r2 );
+
+ $result = AFPData::boolOp( $result, $r2, $op );
+ }
+ }
+
+ /**
+ * @param string &$result
+ */
+ protected function doLevelCompares( &$result ) {
+ $this->doLevelSumRels( $result );
+ $ops = [ '==', '===', '!=', '!==', '<', '>', '<=', '>=', '=' ];
+ while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
+ $op = $this->mCur->value;
+ $this->move();
+ $r2 = new AFPData();
+ $this->doLevelSumRels( $r2 );
+ if ( $this->mShortCircuit ) {
+ break; // The result doesn't matter.
+ }
+ AbuseFilter::triggerLimiter();
+ $result = AFPData::compareOp( $result, $r2, $op );
+ }
+ }
+
+ /**
+ * @param string &$result
+ */
+ protected function doLevelSumRels( &$result ) {
+ $this->doLevelMulRels( $result );
+ $ops = [ '+', '-' ];
+ while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
+ $op = $this->mCur->value;
+ $this->move();
+ $r2 = new AFPData();
+ $this->doLevelMulRels( $r2 );
+ if ( $this->mShortCircuit ) {
+ break; // The result doesn't matter.
+ }
+ if ( $op == '+' ) {
+ $result = AFPData::sum( $result, $r2 );
+ }
+ if ( $op == '-' ) {
+ $result = AFPData::sub( $result, $r2 );
+ }
+ }
+ }
+
+ /**
+ * @param string &$result
+ */
+ protected function doLevelMulRels( &$result ) {
+ $this->doLevelPow( $result );
+ $ops = [ '*', '/', '%' ];
+ while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
+ $op = $this->mCur->value;
+ $this->move();
+ $r2 = new AFPData();
+ $this->doLevelPow( $r2 );
+ if ( $this->mShortCircuit ) {
+ break; // The result doesn't matter.
+ }
+ $result = AFPData::mulRel( $result, $r2, $op, $this->mCur->pos );
+ }
+ }
+
+ /**
+ * @param string &$result
+ */
+ protected function doLevelPow( &$result ) {
+ $this->doLevelBoolInvert( $result );
+ while ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == '**' ) {
+ $this->move();
+ $expanent = new AFPData();
+ $this->doLevelBoolInvert( $expanent );
+ if ( $this->mShortCircuit ) {
+ break; // The result doesn't matter.
+ }
+ $result = AFPData::pow( $result, $expanent );
+ }
+ }
+
+ /**
+ * @param string &$result
+ */
+ protected function doLevelBoolInvert( &$result ) {
+ if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == '!' ) {
+ $this->move();
+ $this->doLevelSpecialWords( $result );
+ if ( $this->mShortCircuit ) {
+ return; // The result doesn't matter.
+ }
+ $result = AFPData::boolInvert( $result );
+ } else {
+ $this->doLevelSpecialWords( $result );
+ }
+ }
+
+ /**
+ * @param string &$result
+ */
+ protected function doLevelSpecialWords( &$result ) {
+ $this->doLevelUnarys( $result );
+ $keyword = strtolower( $this->mCur->value );
+ if ( $this->mCur->type == AFPToken::TKEYWORD
+ && in_array( $keyword, array_keys( self::$mKeywords ) )
+ ) {
+ $func = self::$mKeywords[$keyword];
+ $this->move();
+ $r2 = new AFPData();
+ $this->doLevelUnarys( $r2 );
+
+ if ( $this->mShortCircuit ) {
+ return; // The result doesn't matter.
+ }
+
+ AbuseFilter::triggerLimiter();
+
+ $result = AFPData::$func( $result, $r2, $this->mCur->pos );
+ }
+ }
+
+ /**
+ * @param string &$result
+ */
+ protected function doLevelUnarys( &$result ) {
+ $op = $this->mCur->value;
+ if ( $this->mCur->type == AFPToken::TOP && ( $op == "+" || $op == "-" ) ) {
+ $this->move();
+ $this->doLevelListElements( $result );
+ if ( $this->mShortCircuit ) {
+ return; // The result doesn't matter.
+ }
+ if ( $op == '-' ) {
+ $result = AFPData::unaryMinus( $result );
+ }
+ } else {
+ $this->doLevelListElements( $result );
+ }
+ }
+
+ /**
+ * @param string &$result
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelListElements( &$result ) {
+ $this->doLevelBraces( $result );
+ while ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == '[' ) {
+ $idx = new AFPData();
+ $this->doLevelSemicolon( $idx );
+ if ( !( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos,
+ [ ']', $this->mCur->type, $this->mCur->value ] );
+ }
+ $idx = $idx->toInt();
+ if ( $result->type == AFPData::DLIST ) {
+ if ( count( $result->data ) <= $idx ) {
+ throw new AFPUserVisibleException( 'outofbounds', $this->mCur->pos,
+ [ $idx, count( $result->data ) ] );
+ }
+ $result = $result->data[$idx];
+ } else {
+ throw new AFPUserVisibleException( 'notlist', $this->mCur->pos, [] );
+ }
+ $this->move();
+ }
+ }
+
+ /**
+ * @param string &$result
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelBraces( &$result ) {
+ if ( $this->mCur->type == AFPToken::TBRACE && $this->mCur->value == '(' ) {
+ if ( $this->mShortCircuit ) {
+ $this->skipOverBraces();
+ } else {
+ $this->doLevelSemicolon( $result );
+ }
+ if ( !( $this->mCur->type == AFPToken::TBRACE && $this->mCur->value == ')' ) ) {
+ throw new AFPUserVisibleException(
+ 'expectednotfound',
+ $this->mCur->pos,
+ [ ')', $this->mCur->type, $this->mCur->value ]
+ );
+ }
+ $this->move();
+ } else {
+ $this->doLevelFunction( $result );
+ }
+ }
+
+ /**
+ * @param string &$result
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelFunction( &$result ) {
+ if ( $this->mCur->type == AFPToken::TID && isset( self::$mFunctions[$this->mCur->value] ) ) {
+ $func = self::$mFunctions[$this->mCur->value];
+ $this->move();
+ if ( $this->mCur->type != AFPToken::TBRACE || $this->mCur->value != '(' ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mCur->pos,
+ [
+ '(',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+
+ if ( $this->mShortCircuit ) {
+ $this->skipOverBraces();
+ $this->move();
+
+ return; // The result doesn't matter.
+ }
+
+ $args = [];
+ do {
+ $r = new AFPData();
+ $this->doLevelSemicolon( $r );
+ $args[] = $r;
+ } while ( $this->mCur->type == AFPToken::TCOMMA );
+
+ if ( $this->mCur->type != AFPToken::TBRACE || $this->mCur->value != ')' ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mCur->pos,
+ [
+ ')',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+
+ $funcHash = md5( $func . serialize( $args ) );
+
+ if ( isset( self::$funcCache[$funcHash] ) &&
+ !in_array( $func, self::$ActiveFunctions )
+ ) {
+ $result = self::$funcCache[$funcHash];
+ } else {
+ AbuseFilter::triggerLimiter();
+ $result = self::$funcCache[$funcHash] = $this->$func( $args );
+ }
+
+ if ( count( self::$funcCache ) > 1000 ) {
+ self::$funcCache = [];
+ }
+ } else {
+ $this->doLevelAtom( $result );
+ }
+ }
+
+ /**
+ * @param string &$result
+ * @throws AFPUserVisibleException
+ * @return AFPData
+ */
+ protected function doLevelAtom( &$result ) {
+ $tok = $this->mCur->value;
+ switch ( $this->mCur->type ) {
+ case AFPToken::TID:
+ if ( $this->mShortCircuit ) {
+ break;
+ }
+ $var = strtolower( $tok );
+ $result = $this->getVarValue( $var );
+ break;
+ case AFPToken::TSTRING:
+ $result = new AFPData( AFPData::DSTRING, $tok );
+ break;
+ case AFPToken::TFLOAT:
+ $result = new AFPData( AFPData::DFLOAT, $tok );
+ break;
+ case AFPToken::TINT:
+ $result = new AFPData( AFPData::DINT, $tok );
+ break;
+ case AFPToken::TKEYWORD:
+ if ( $tok == "true" ) {
+ $result = new AFPData( AFPData::DBOOL, true );
+ } elseif ( $tok == "false" ) {
+ $result = new AFPData( AFPData::DBOOL, false );
+ } elseif ( $tok == "null" ) {
+ $result = new AFPData();
+ } else {
+ throw new AFPUserVisibleException(
+ 'unrecognisedkeyword',
+ $this->mCur->pos,
+ [ $tok ]
+ );
+ }
+ break;
+ case AFPToken::TNONE:
+ return; // Handled at entry level
+ case AFPToken::TBRACE:
+ if ( $this->mCur->value == ')' ) {
+ return; // Handled at the entry level
+ }
+ case AFPToken::TSQUAREBRACKET:
+ if ( $this->mCur->value == '[' ) {
+ $list = [];
+ while ( true ) {
+ $this->move();
+ if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) {
+ break;
+ }
+ $item = new AFPData();
+ $this->doLevelSet( $item );
+ $list[] = $item;
+ if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) {
+ break;
+ }
+ if ( $this->mCur->type != AFPToken::TCOMMA ) {
+ throw new AFPUserVisibleException(
+ 'expectednotfound',
+ $this->mCur->pos,
+ [ ', or ]', $this->mCur->type, $this->mCur->value ]
+ );
+ }
+ }
+ $result = new AFPData( AFPData::DLIST, $list );
+ break;
+ }
+ default:
+ throw new AFPUserVisibleException(
+ 'unexpectedtoken',
+ $this->mCur->pos,
+ [
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+ }
+
+ /* End of levels */
+
+ /**
+ * @param string $var
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function getVarValue( $var ) {
+ $var = strtolower( $var );
+ $builderValues = AbuseFilter::getBuilderValues();
+ if ( !( array_key_exists( $var, $builderValues['vars'] )
+ || $this->mVars->varIsSet( $var ) )
+ ) {
+ // If the variable is invalid, throw an exception
+ throw new AFPUserVisibleException(
+ 'unrecognisedvar',
+ $this->mCur->pos,
+ [ $var ]
+ );
+ } else {
+ return $this->mVars->getVar( $var );
+ }
+ }
+
+ /**
+ * @param string $name
+ * @param string $value
+ * @throws AFPUserVisibleException
+ */
+ protected function setUserVariable( $name, $value ) {
+ $builderValues = AbuseFilter::getBuilderValues();
+ if ( array_key_exists( $name, $builderValues['vars'] ) ) {
+ throw new AFPUserVisibleException( 'overridebuiltin', $this->mCur->pos, [ $name ] );
+ }
+ $this->mVars->setVar( $name, $value );
+ }
+
+ // Built-in functions
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcLc( $args ) {
+ global $wgContLang;
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'lc', 2, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ return new AFPData( AFPData::DSTRING, $wgContLang->lc( $s ) );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcUc( $args ) {
+ global $wgContLang;
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'uc', 2, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ return new AFPData( AFPData::DSTRING, $wgContLang->uc( $s ) );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcLen( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'len', 2, count( $args ) ]
+ );
+ }
+ if ( $args[0]->type == AFPData::DLIST ) {
+ // Don't use toString on lists, but count
+ return new AFPData( AFPData::DINT, count( $args[0]->data ) );
+ }
+ $s = $args[0]->toString();
+
+ return new AFPData( AFPData::DINT, mb_strlen( $s, 'utf-8' ) );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcSimpleNorm( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'simplenorm', 2, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ $s = preg_replace( '/[\d\W]+/', '', $s );
+ $s = strtolower( $s );
+
+ return new AFPData( AFPData::DSTRING, $s );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcSpecialRatio( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'specialratio', 1, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ if ( !strlen( $s ) ) {
+ return new AFPData( AFPData::DFLOAT, 0 );
+ }
+
+ $nospecials = $this->rmspecials( $s );
+
+ $val = 1. - ( ( mb_strlen( $nospecials ) / mb_strlen( $s ) ) );
+
+ return new AFPData( AFPData::DFLOAT, $val );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcCount( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'count', 1, count( $args ) ]
+ );
+ }
+
+ if ( $args[0]->type == AFPData::DLIST && count( $args ) == 1 ) {
+ return new AFPData( AFPData::DINT, count( $args[0]->data ) );
+ }
+
+ if ( count( $args ) == 1 ) {
+ $count = count( explode( ',', $args[0]->toString() ) );
+ } else {
+ $needle = $args[0]->toString();
+ $haystack = $args[1]->toString();
+
+ // T62203: Keep empty parameters from causing PHP warnings
+ if ( $needle === '' ) {
+ $count = 0;
+ } else {
+ $count = substr_count( $haystack, $needle );
+ }
+ }
+
+ return new AFPData( AFPData::DINT, $count );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ * @throws Exception
+ */
+ protected function funcRCount( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'rcount', 1, count( $args ) ]
+ );
+ }
+
+ if ( count( $args ) == 1 ) {
+ $count = count( explode( ',', $args[0]->toString() ) );
+ } else {
+ $needle = $args[0]->toString();
+ $haystack = $args[1]->toString();
+
+ # Munge the regex
+ $needle = preg_replace( '!(\\\\\\\\)*(\\\\)?/!', '$1\/', $needle );
+ $needle = "/$needle/u";
+
+ // Omit the '$matches' argument to avoid computing them, just count.
+ $count = preg_match_all( $needle, $haystack );
+
+ if ( $count === false ) {
+ throw new AFPUserVisibleException(
+ 'regexfailure',
+ $this->mCur->pos,
+ [ 'unspecified error in preg_match_all()', $needle ]
+ );
+ }
+ }
+
+ return new AFPData( AFPData::DINT, $count );
+ }
+
+ /**
+ * Returns an array of matches of needle in the haystack, the first one for the whole regex,
+ * the other ones for every capturing group.
+ *
+ * @param array $args
+ * @return AFPData A list of matches.
+ * @throws AFPUserVisibleException
+ */
+ protected function funcGetMatches( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'get_matches', 2, count( $args ) ]
+ );
+ }
+ $needle = $args[0]->toString();
+ $haystack = $args[1]->toString();
+
+ // Count the amount of capturing groups in the submitted pattern.
+ // This way we can return a fixed-dimension array, much easier to manage.
+ // First, strip away escaped parentheses
+ $sanitized = preg_replace( '/(\\\\\\\\)*\\\\\(/', '', $needle );
+ // Then strip starting parentheses of non-capturing groups
+ // (also atomics, lookahead and so on, even if not every of them is supported)
+ $sanitized = preg_replace( '/\(\?/', '', $sanitized );
+ // Finally create an array of falses with dimension = # of capturing groups
+ $groupscount = substr_count( $sanitized, '(' ) + 1;
+ $falsy = array_fill( 0, $groupscount, false );
+
+ // Munge the regex by escaping slashes
+ $needle = preg_replace( '!(\\\\\\\\)*(\\\\)?/!', '$1\/', $needle );
+ $needle = "/$needle/u";
+
+ // Suppress and restore are here for the same reason as T177744
+ Wikimedia\suppressWarnings();
+ $check = preg_match( $needle, $haystack, $matches );
+ Wikimedia\restoreWarnings();
+
+ if ( $check === false ) {
+ throw new AFPUserVisibleException(
+ 'regexfailure',
+ $this->mCur->pos,
+ [ 'unspecified error in preg_match()', $needle ]
+ );
+ }
+
+ // Returned array has non-empty positions identical to the ones returned
+ // by the third parameter of a standard preg_match call ($matches in this case).
+ // We want an union with falsy to return a fixed-dimention array.
+ return AFPData::newFromPHPVar( $matches + $falsy );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcIPInRange( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'ip_in_range', 2, count( $args ) ]
+ );
+ }
+
+ $ip = $args[0]->toString();
+ $range = $args[1]->toString();
+
+ $result = IP::isInRange( $ip, $range );
+
+ return new AFPData( AFPData::DBOOL, $result );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcCCNorm( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'ccnorm', 1, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ $s = html_entity_decode( $s, ENT_QUOTES, 'UTF-8' );
+ $s = $this->ccnorm( $s );
+
+ return new AFPData( AFPData::DSTRING, $s );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcContainsAny( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'contains_any', 2, count( $args ) ]
+ );
+ }
+
+ $s = array_shift( $args );
+
+ return new AFPData( AFPData::DBOOL, self::contains( $s, $args, true ) );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcContainsAll( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'contains_all', 2, count( $args ) ]
+ );
+ }
+
+ $s = array_shift( $args );
+
+ return new AFPData( AFPData::DBOOL, self::contains( $s, $args, false, false ) );
+ }
+
+ /**
+ * Normalize and search a string for multiple substrings in OR mode
+ *
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcCCNormContainsAny( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'ccnorm_contains_any', 2, count( $args ) ]
+ );
+ }
+
+ $s = array_shift( $args );
+
+ return new AFPData( AFPData::DBOOL, self::contains( $s, $args, true, true ) );
+ }
+
+ /**
+ * Normalize and search a string for multiple substrings in AND mode
+ *
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcCCNormContainsAll( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'ccnorm_contains_all', 2, count( $args ) ]
+ );
+ }
+
+ $s = array_shift( $args );
+
+ return new AFPData( AFPData::DBOOL, self::contains( $s, $args, false, true ) );
+ }
+
+ /**
+ * Search for substrings in a string
+ *
+ * Use is_any to determine wether to use logic OR (true) or AND (false).
+ *
+ * Use normalize = true to make use of ccnorm and
+ * normalize both sides of the search.
+ *
+ * @param AFPData $string
+ * @param AFPData[] $values
+ * @param bool $is_any
+ * @param bool $normalize
+ *
+ * @return bool
+ */
+ protected static function contains( $string, $values, $is_any = true, $normalize = false ) {
+ $string = $string->toString();
+ if ( $string == '' ) {
+ return false;
+ }
+
+ if ( $normalize ) {
+ $string = self::ccnorm( $string );
+ }
+
+ foreach ( $values as $needle ) {
+ $needle = $needle->toString();
+ if ( $normalize ) {
+ $needle = self::ccnorm( $needle );
+ }
+ if ( $needle === '' ) {
+ // T62203: Keep empty parameters from causing PHP warnings
+ continue;
+ }
+
+ $is_found = strpos( $string, $needle ) !== false;
+ if ( $is_found === $is_any ) {
+ // If I'm here and it's ANY (OR) it means that something is found.
+ // Just enough! Found!
+ // If I'm here and it's ALL (AND) it means that something isn't found.
+ // Just enough! Not found!
+ return $is_found;
+ }
+ }
+
+ // If I'm here and it's ANY (OR) it means that nothing was found:
+ // return false (because $is_any is true)
+ // If I'm here and it's ALL (AND) it means that everything were found:
+ // return true (because $is_any is false)
+ return ! $is_any;
+ }
+
+ /**
+ * @param string $s
+ * @return mixed
+ */
+ protected static function ccnorm( $s ) {
+ // Instatiate a single version of the equivset so the data is not loaded
+ // more than once.
+ if ( !self::$equivset ) {
+ self::$equivset = new Equivset();
+ }
+
+ return self::$equivset->normalize( $s );
+ }
+
+ /**
+ * @param string $s
+ * @return array|string
+ */
+ protected function rmspecials( $s ) {
+ return preg_replace( '/[^\p{L}\p{N}]/u', '', $s );
+ }
+
+ /**
+ * @param string $s
+ * @return array|string
+ */
+ protected function rmdoubles( $s ) {
+ return preg_replace( '/(.)\1+/us', '\1', $s );
+ }
+
+ /**
+ * @param string $s
+ * @return array|string
+ */
+ protected function rmwhitespace( $s ) {
+ return preg_replace( '/\s+/u', '', $s );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcRMSpecials( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'rmspecials', 1, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ $s = $this->rmspecials( $s );
+
+ return new AFPData( AFPData::DSTRING, $s );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcRMWhitespace( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'rmwhitespace', 1, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ $s = $this->rmwhitespace( $s );
+
+ return new AFPData( AFPData::DSTRING, $s );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcRMDoubles( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'rmdoubles', 1, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ $s = $this->rmdoubles( $s );
+
+ return new AFPData( AFPData::DSTRING, $s );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcNorm( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'norm', 1, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ $s = $this->ccnorm( $s );
+ $s = $this->rmdoubles( $s );
+ $s = $this->rmspecials( $s );
+ $s = $this->rmwhitespace( $s );
+
+ return new AFPData( AFPData::DSTRING, $s );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcSubstr( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'substr', 2, count( $args ) ]
+ );
+ }
+
+ $s = $args[0]->toString();
+ $offset = $args[1]->toInt();
+
+ if ( isset( $args[2] ) ) {
+ $length = $args[2]->toInt();
+
+ $result = mb_substr( $s, $offset, $length );
+ } else {
+ $result = mb_substr( $s, $offset );
+ }
+
+ return new AFPData( AFPData::DSTRING, $result );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcStrPos( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'strpos', 2, count( $args ) ]
+ );
+ }
+
+ $haystack = $args[0]->toString();
+ $needle = $args[1]->toString();
+
+ // T62203: Keep empty parameters from causing PHP warnings
+ if ( $needle === '' ) {
+ return new AFPData( AFPData::DINT, -1 );
+ }
+
+ if ( isset( $args[2] ) ) {
+ $offset = $args[2]->toInt();
+
+ $result = mb_strpos( $haystack, $needle, $offset );
+ } else {
+ $result = mb_strpos( $haystack, $needle );
+ }
+
+ if ( $result === false ) {
+ $result = -1;
+ }
+
+ return new AFPData( AFPData::DINT, $result );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcStrReplace( $args ) {
+ if ( count( $args ) < 3 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'str_replace', 3, count( $args ) ]
+ );
+ }
+
+ $subject = $args[0]->toString();
+ $search = $args[1]->toString();
+ $replace = $args[2]->toString();
+
+ return new AFPData( AFPData::DSTRING, str_replace( $search, $replace, $subject ) );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcStrRegexEscape( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException( 'notenoughargs', $this->mCur->pos,
+ [ 'rescape', 1, count( $args ) ] );
+ }
+
+ $string = $args[0]->toString();
+
+ // preg_quote does not need the second parameter, since rlike takes
+ // care of the delimiter symbol itself
+ return new AFPData( AFPData::DSTRING, preg_quote( $string ) );
+ }
+
+ /**
+ * @param array $args
+ * @return mixed
+ * @throws AFPUserVisibleException
+ */
+ protected function funcSetVar( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'set_var', 2, count( $args ) ]
+ );
+ }
+
+ $varName = $args[0]->toString();
+ $value = $args[1];
+
+ $this->setUserVariable( $varName, $value );
+
+ return $value;
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function castString( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, [ __METHOD__ ] );
+ }
+ $val = $args[0];
+
+ return AFPData::castTypes( $val, AFPData::DSTRING );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function castInt( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, [ __METHOD__ ] );
+ }
+ $val = $args[0];
+
+ return AFPData::castTypes( $val, AFPData::DINT );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function castFloat( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, [ __METHOD__ ] );
+ }
+ $val = $args[0];
+
+ return AFPData::castTypes( $val, AFPData::DFLOAT );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function castBool( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, [ __METHOD__ ] );
+ }
+ $val = $args[0];
+
+ return AFPData::castTypes( $val, AFPData::DBOOL );
+ }
+}
diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterTokenizer.php b/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterTokenizer.php
new file mode 100644
index 00000000..a97fccaf
--- /dev/null
+++ b/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterTokenizer.php
@@ -0,0 +1,258 @@
+<?php
+
+use MediaWiki\MediaWikiServices;
+
+/**
+ * Tokenizer for AbuseFilter rules.
+ */
+class AbuseFilterTokenizer {
+ /** @var int Tokenizer cache version. Increment this when changing the syntax. **/
+ const CACHE_VERSION = 1;
+ const COMMENT_START_RE = '/\s*\/\*/A';
+ const ID_SYMBOL_RE = '/[0-9A-Za-z_]+/A';
+ const OPERATOR_RE =
+ '/(\!\=\=|\!\=|\!|\*\*|\*|\/|\+|\-|%|&|\||\^|\:\=|\?|\:|\<\=|\<|\>\=|\>|\=\=\=|\=\=|\=)/A';
+ const RADIX_RE = '/([0-9A-Fa-f]+(?:\.\d*)?|\.\d+)([bxo])?/Au';
+ const WHITESPACE = "\011\012\013\014\015\040";
+
+ // Order is important. The punctuation-matching regex requires that
+ // ** comes before *, etc. They are sorted to make it easy to spot
+ // such errors.
+ public static $operators = [
+ '!==', '!=', '!', // Inequality
+ '**', '*', // Multiplication/exponentiation
+ '/', '+', '-', '%', // Other arithmetic
+ '&', '|', '^', // Logic
+ ':=', // Setting
+ '?', ':', // Ternery
+ '<=', '<', // Less than
+ '>=', '>', // Greater than
+ '===', '==', '=', // Equality
+ ];
+
+ public static $punctuation = [
+ ',' => AFPToken::TCOMMA,
+ '(' => AFPToken::TBRACE,
+ ')' => AFPToken::TBRACE,
+ '[' => AFPToken::TSQUAREBRACKET,
+ ']' => AFPToken::TSQUAREBRACKET,
+ ';' => AFPToken::TSTATEMENTSEPARATOR,
+ ];
+
+ public static $bases = [
+ 'b' => 2,
+ 'x' => 16,
+ 'o' => 8
+ ];
+
+ public static $baseCharsRe = [
+ 2 => '/^[01]+$/',
+ 8 => '/^[0-8]+$/',
+ 16 => '/^[0-9A-Fa-f]+$/',
+ 10 => '/^[0-9.]+$/',
+ ];
+
+ public static $keywords = [
+ 'in', 'like', 'true', 'false', 'null', 'contains', 'matches',
+ 'rlike', 'irlike', 'regex', 'if', 'then', 'else', 'end',
+ ];
+
+ /**
+ * @param string $code
+ * @return array
+ * @throws AFPException
+ * @throws AFPUserVisibleException
+ */
+ static function tokenize( $code ) {
+ static $tokenizerCache = null;
+
+ if ( !$tokenizerCache ) {
+ $tokenizerCache = ObjectCache::getLocalServerInstance( 'hash' );
+ }
+
+ static $stats = null;
+
+ if ( !$stats ) {
+ $stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
+ }
+
+ $cacheKey = wfGlobalCacheKey( __CLASS__, self::CACHE_VERSION, crc32( $code ) );
+
+ $tokens = $tokenizerCache->get( $cacheKey );
+
+ if ( $tokens ) {
+ $stats->increment( 'AbuseFilter.tokenizerCache.hit' );
+ return $tokens;
+ }
+
+ $stats->increment( 'AbuseFilter.tokenizerCache.miss' );
+ $tokens = [];
+ $curPos = 0;
+
+ do {
+ $prevPos = $curPos;
+ $token = self::nextToken( $code, $curPos );
+ $tokens[ $token->pos ] = [ $token, $curPos ];
+ } while ( $curPos !== $prevPos );
+
+ $tokenizerCache->set( $cacheKey, $tokens, 60 * 60 * 24 );
+
+ return $tokens;
+ }
+
+ /**
+ * @param string $code
+ * @param int &$offset
+ * @return AFPToken
+ * @throws AFPException
+ * @throws AFPUserVisibleException
+ */
+ protected static function nextToken( $code, &$offset ) {
+ $matches = [];
+ $start = $offset;
+
+ // Read past comments
+ while ( preg_match( self::COMMENT_START_RE, $code, $matches, 0, $offset ) ) {
+ if ( strpos( $code, '*/', $offset ) === false ) {
+ throw new AFPUserVisibleException(
+ 'unclosedcomment', $offset, [] );
+ }
+ $offset = strpos( $code, '*/', $offset ) + 2;
+ }
+
+ // Spaces
+ $offset += strspn( $code, self::WHITESPACE, $offset );
+ if ( $offset >= strlen( $code ) ) {
+ return new AFPToken( AFPToken::TNONE, '', $start );
+ }
+
+ $chr = $code[$offset];
+
+ // Punctuation
+ if ( isset( self::$punctuation[$chr] ) ) {
+ $offset++;
+ return new AFPToken( self::$punctuation[$chr], $chr, $start );
+ }
+
+ // String literal
+ if ( $chr === '"' || $chr === "'" ) {
+ return self::readStringLiteral( $code, $offset, $start );
+ }
+
+ $matches = [];
+
+ // Operators
+ if ( preg_match( self::OPERATOR_RE, $code, $matches, 0, $offset ) ) {
+ $token = $matches[0];
+ $offset += strlen( $token );
+ return new AFPToken( AFPToken::TOP, $token, $start );
+ }
+
+ // Numbers
+ if ( preg_match( self::RADIX_RE, $code, $matches, 0, $offset ) ) {
+ $token = $matches[0];
+ $input = $matches[1];
+ $baseChar = isset( $matches[2] ) ? $matches[2] : null;
+ // Sometimes the base char gets mixed in with the rest of it because
+ // the regex targets hex, too.
+ // This mostly happens with binary
+ if ( !$baseChar && !empty( self::$bases[ substr( $input, - 1 ) ] ) ) {
+ $baseChar = substr( $input, - 1, 1 );
+ $input = substr( $input, 0, - 1 );
+ }
+
+ $base = $baseChar ? self::$bases[$baseChar] : 10;
+
+ // Check against the appropriate character class for input validation
+
+ if ( preg_match( self::$baseCharsRe[$base], $input ) ) {
+ $num = $base !== 10 ? base_convert( $input, $base, 10 ) : $input;
+ $offset += strlen( $token );
+ return ( strpos( $input, '.' ) !== false )
+ ? new AFPToken( AFPToken::TFLOAT, floatval( $num ), $start )
+ : new AFPToken( AFPToken::TINT, intval( $num ), $start );
+ }
+ }
+
+ // IDs / Keywords
+
+ if ( preg_match( self::ID_SYMBOL_RE, $code, $matches, 0, $offset ) ) {
+ $token = $matches[0];
+ $offset += strlen( $token );
+ $type = in_array( $token, self::$keywords )
+ ? AFPToken::TKEYWORD
+ : AFPToken::TID;
+ return new AFPToken( $type, $token, $start );
+ }
+
+ throw new AFPUserVisibleException(
+ 'unrecognisedtoken', $start, [ substr( $code, $start ) ] );
+ }
+
+ /**
+ * @param string $code
+ * @param int &$offset
+ * @param int $start
+ * @return AFPToken
+ * @throws AFPException
+ * @throws AFPUserVisibleException
+ */
+ protected static function readStringLiteral( $code, &$offset, $start ) {
+ $type = $code[$offset];
+ $offset++;
+ $length = strlen( $code );
+ $token = '';
+ while ( $offset < $length ) {
+ if ( $code[$offset] === $type ) {
+ $offset++;
+ return new AFPToken( AFPToken::TSTRING, $token, $start );
+ }
+
+ // Performance: Use a PHP function (implemented in C)
+ // to scan ahead.
+ $addLength = strcspn( $code, $type . "\\", $offset );
+ if ( $addLength ) {
+ $token .= substr( $code, $offset, $addLength );
+ $offset += $addLength;
+ } elseif ( $code[$offset] == '\\' ) {
+ switch ( $code[$offset + 1] ) {
+ case '\\':
+ $token .= '\\';
+ break;
+ case $type:
+ $token .= $type;
+ break;
+ case 'n';
+ $token .= "\n";
+ break;
+ case 'r':
+ $token .= "\r";
+ break;
+ case 't':
+ $token .= "\t";
+ break;
+ case 'x':
+ $chr = substr( $code, $offset + 2, 2 );
+
+ if ( preg_match( '/^[0-9A-Fa-f]{2}$/', $chr ) ) {
+ $chr = base_convert( $chr, 16, 10 );
+ $token .= chr( $chr );
+ $offset += 2; # \xXX -- 2 done later
+ } else {
+ $token .= 'x';
+ }
+ break;
+ default:
+ $token .= "\\" . $code[$offset + 1];
+ }
+
+ $offset += 2;
+
+ } else {
+ $token .= $code[$offset];
+ $offset++;
+ }
+ }
+ throw new AFPUserVisibleException( 'unclosedstring', $offset, [] );
+ }
+}