diff options
Diffstat (limited to 'www/wiki/extensions/AbuseFilter/includes/parser')
10 files changed, 3446 insertions, 0 deletions
diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AFPData.php b/www/wiki/extensions/AbuseFilter/includes/parser/AFPData.php new file mode 100644 index 00000000..ff1faa98 --- /dev/null +++ b/www/wiki/extensions/AbuseFilter/includes/parser/AFPData.php @@ -0,0 +1,497 @@ +<?php + +class AFPData { + // Datatypes + const DINT = 'int'; + const DSTRING = 'string'; + const DNULL = 'null'; + const DBOOL = 'bool'; + const DFLOAT = 'float'; + const DLIST = 'list'; + + // Translation table mapping shell-style wildcards to PCRE equivalents. + // Derived from <http://www.php.net/manual/en/function.fnmatch.php#100207> + private static $wildcardMap = [ + '\*' => '.*', + '\+' => '\+', + '\-' => '\-', + '\.' => '\.', + '\?' => '.', + '\[' => '[', + '\[\!' => '[^', + '\\' => '\\\\', + '\]' => ']', + ]; + + public $type; + public $data; + + /** + * @param string $type + * @param null $val + */ + public function __construct( $type = self::DNULL, $val = null ) { + $this->type = $type; + $this->data = $val; + } + + /** + * @param mixed $var + * @return AFPData + * @throws AFPException + */ + public static function newFromPHPVar( $var ) { + if ( is_string( $var ) ) { + return new AFPData( self::DSTRING, $var ); + } elseif ( is_int( $var ) ) { + return new AFPData( self::DINT, $var ); + } elseif ( is_float( $var ) ) { + return new AFPData( self::DFLOAT, $var ); + } elseif ( is_bool( $var ) ) { + return new AFPData( self::DBOOL, $var ); + } elseif ( is_array( $var ) ) { + $result = []; + foreach ( $var as $item ) { + $result[] = self::newFromPHPVar( $item ); + } + + return new AFPData( self::DLIST, $result ); + } elseif ( is_null( $var ) ) { + return new AFPData(); + } else { + throw new AFPException( + 'Data type ' . gettype( $var ) . ' is not supported by AbuseFilter' + ); + } + } + + /** + * @return AFPData + */ + public function dup() { + return new AFPData( $this->type, $this->data ); + } + + /** + * @param AFPData $orig + * @param string $target + * @return AFPData + */ + public static function castTypes( $orig, $target ) { + if ( $orig->type == $target ) { + return $orig->dup(); + } + if ( $target == self::DNULL ) { + return new AFPData(); + } + + if ( $orig->type == self::DLIST ) { + if ( $target == self::DBOOL ) { + return new AFPData( self::DBOOL, (bool)count( $orig->data ) ); + } + if ( $target == self::DFLOAT ) { + return new AFPData( self::DFLOAT, floatval( count( $orig->data ) ) ); + } + if ( $target == self::DINT ) { + return new AFPData( self::DINT, intval( count( $orig->data ) ) ); + } + if ( $target == self::DSTRING ) { + $s = ''; + foreach ( $orig->data as $item ) { + $s .= $item->toString() . "\n"; + } + + return new AFPData( self::DSTRING, $s ); + } + } + + if ( $target == self::DBOOL ) { + return new AFPData( self::DBOOL, (bool)$orig->data ); + } + if ( $target == self::DFLOAT ) { + return new AFPData( self::DFLOAT, floatval( $orig->data ) ); + } + if ( $target == self::DINT ) { + return new AFPData( self::DINT, intval( $orig->data ) ); + } + if ( $target == self::DSTRING ) { + return new AFPData( self::DSTRING, strval( $orig->data ) ); + } + if ( $target == self::DLIST ) { + return new AFPData( self::DLIST, [ $orig ] ); + } + } + + /** + * @param AFPData $value + * @return AFPData + */ + public static function boolInvert( $value ) { + return new AFPData( self::DBOOL, !$value->toBool() ); + } + + /** + * @param AFPData $base + * @param AFPData $exponent + * @return AFPData + */ + public static function pow( $base, $exponent ) { + $res = pow( $base->toNumber(), $exponent->toNumber() ); + if ( $res === (int)$res ) { + return new AFPData( self::DINT, $res ); + } else { + return new AFPData( self::DFLOAT, $res ); + } + } + + /** + * @param AFPData $a + * @param AFPData $b + * @return AFPData + */ + public static function keywordIn( $a, $b ) { + $a = $a->toString(); + $b = $b->toString(); + + if ( $a == '' || $b == '' ) { + return new AFPData( self::DBOOL, false ); + } + + return new AFPData( self::DBOOL, strpos( $b, $a ) !== false ); + } + + /** + * @param AFPData $a + * @param AFPData $b + * @return AFPData + */ + public static function keywordContains( $a, $b ) { + $a = $a->toString(); + $b = $b->toString(); + + if ( $a == '' || $b == '' ) { + return new AFPData( self::DBOOL, false ); + } + + return new AFPData( self::DBOOL, strpos( $a, $b ) !== false ); + } + + /** + * @param string $value + * @param mixed $list + * @return bool + */ + public static function listContains( $value, $list ) { + // Should use built-in PHP function somehow + foreach ( $list->data as $item ) { + if ( self::equals( $value, $item ) ) { + return true; + } + } + + return false; + } + + /** + * @ToDo Should we also build a proper system to compare arrays with different types? + * @param AFPData $d1 + * @param AFPData $d2 + * @param bool $strict whether to also check types + * @return bool + */ + public static function equals( $d1, $d2, $strict = false ) { + if ( $d1->type != self::DLIST && $d2->type != self::DLIST ) { + $typecheck = $d1->type == $d2->type || !$strict; + return $typecheck && $d1->toString() === $d2->toString(); + } elseif ( $d1->type == self::DLIST && $d2->type == self::DLIST ) { + $data1 = $d1->data; + $data2 = $d2->data; + if ( count( $data1 ) !== count( $data2 ) ) { + return false; + } + $length = count( $data1 ); + for ( $i = 0; $i < $length; $i++ ) { + $result = self::equals( $data1[$i], $data2[$i], $strict ); + if ( $result === false ) { + return false; + } + } + return true; + } else { + // Trying to compare an array to something else + return false; + } + } + + /** + * @param AFPData $str + * @param AFPData $pattern + * @return AFPData + */ + public static function keywordLike( $str, $pattern ) { + $str = $str->toString(); + $pattern = '#^' . strtr( preg_quote( $pattern->toString(), '#' ), self::$wildcardMap ) . '$#u'; + Wikimedia\suppressWarnings(); + $result = preg_match( $pattern, $str ); + Wikimedia\restoreWarnings(); + + return new AFPData( self::DBOOL, (bool)$result ); + } + + /** + * @param AFPData $str + * @param AFPData $regex + * @param int $pos + * @param bool $insensitive + * @return AFPData + * @throws Exception + */ + public static function keywordRegex( $str, $regex, $pos, $insensitive = false ) { + $str = $str->toString(); + $pattern = $regex->toString(); + + $pattern = preg_replace( '!(\\\\\\\\)*(\\\\)?/!', '$1\/', $pattern ); + $pattern = "/$pattern/u"; + + if ( $insensitive ) { + $pattern .= 'i'; + } + + Wikimedia\suppressWarnings(); + $result = preg_match( $pattern, $str ); + Wikimedia\restoreWarnings(); + if ( $result === false ) { + throw new AFPUserVisibleException( + 'regexfailure', + $pos, + [ 'unspecified error in preg_match()', $pattern ] + ); + } + + return new AFPData( self::DBOOL, (bool)$result ); + } + + /** + * @param string $str + * @param string $regex + * @param int $pos + * @return AFPData + */ + public static function keywordRegexInsensitive( $str, $regex, $pos ) { + return self::keywordRegex( $str, $regex, $pos, true ); + } + + /** + * @param AFPData $data + * @return AFPData + */ + public static function unaryMinus( $data ) { + if ( $data->type == self::DINT ) { + return new AFPData( $data->type, -$data->toInt() ); + } else { + return new AFPData( $data->type, -$data->toFloat() ); + } + } + + /** + * @param AFPData $a + * @param AFPData $b + * @param string $op + * @return AFPData + * @throws AFPException + */ + public static function boolOp( $a, $b, $op ) { + $a = $a->toBool(); + $b = $b->toBool(); + if ( $op == '|' ) { + return new AFPData( self::DBOOL, $a || $b ); + } + if ( $op == '&' ) { + return new AFPData( self::DBOOL, $a && $b ); + } + if ( $op == '^' ) { + return new AFPData( self::DBOOL, $a xor $b ); + } + throw new AFPException( "Invalid boolean operation: {$op}" ); // Should never happen. + } + + /** + * @param AFPData $a + * @param AFPData $b + * @param string $op + * @return AFPData + * @throws AFPException + */ + public static function compareOp( $a, $b, $op ) { + if ( $op == '==' || $op == '=' ) { + return new AFPData( self::DBOOL, self::equals( $a, $b ) ); + } + if ( $op == '!=' ) { + return new AFPData( self::DBOOL, !self::equals( $a, $b ) ); + } + if ( $op == '===' ) { + return new AFPData( self::DBOOL, self::equals( $a, $b, true ) ); + } + if ( $op == '!==' ) { + return new AFPData( self::DBOOL, !self::equals( $a, $b, true ) ); + } + $a = $a->toString(); + $b = $b->toString(); + if ( $op == '>' ) { + return new AFPData( self::DBOOL, $a > $b ); + } + if ( $op == '<' ) { + return new AFPData( self::DBOOL, $a < $b ); + } + if ( $op == '>=' ) { + return new AFPData( self::DBOOL, $a >= $b ); + } + if ( $op == '<=' ) { + return new AFPData( self::DBOOL, $a <= $b ); + } + throw new AFPException( "Invalid comparison operation: {$op}" ); // Should never happen + } + + /** + * @param AFPData $a + * @param AFPData $b + * @param string $op + * @param int $pos + * @return AFPData + * @throws AFPUserVisibleException + * @throws AFPException + */ + public static function mulRel( $a, $b, $op, $pos ) { + $a = $a->toNumber(); + $b = $b->toNumber(); + + if ( $op != '*' && $b == 0 ) { + throw new AFPUserVisibleException( 'dividebyzero', $pos, [ $a ] ); + } + + if ( $op == '*' ) { + $data = $a * $b; + } elseif ( $op == '/' ) { + $data = $a / $b; + } elseif ( $op == '%' ) { + $data = $a % $b; + } else { + // Should never happen + throw new AFPException( "Invalid multiplication-related operation: {$op}" ); + } + + if ( $data === (int)$data ) { + $data = intval( $data ); + $type = self::DINT; + } else { + $data = floatval( $data ); + $type = self::DFLOAT; + } + + return new AFPData( $type, $data ); + } + + /** + * @param AFPData $a + * @param AFPData $b + * @return AFPData + */ + public static function sum( $a, $b ) { + if ( $a->type == self::DSTRING || $b->type == self::DSTRING ) { + return new AFPData( self::DSTRING, $a->toString() . $b->toString() ); + } elseif ( $a->type == self::DLIST && $b->type == self::DLIST ) { + return new AFPData( self::DLIST, array_merge( $a->toList(), $b->toList() ) ); + } else { + $res = $a->toNumber() + $b->toNumber(); + if ( $res === (int)$res ) { + return new AFPData( self::DINT, $res ); + } else { + return new AFPData( self::DFLOAT, $res ); + } + } + } + + /** + * @param AFPData $a + * @param AFPData $b + * @return AFPData + */ + public static function sub( $a, $b ) { + $res = $a->toNumber() - $b->toNumber(); + if ( $res === (int)$res ) { + return new AFPData( self::DINT, $res ); + } else { + return new AFPData( self::DFLOAT, $res ); + } + } + + /** Convert shorteners */ + + /** + * @throws MWException + * @return mixed + */ + public function toNative() { + switch ( $this->type ) { + case self::DBOOL: + return $this->toBool(); + case self::DSTRING: + return $this->toString(); + case self::DFLOAT: + return $this->toFloat(); + case self::DINT: + return $this->toInt(); + case self::DLIST: + $input = $this->toList(); + $output = []; + foreach ( $input as $item ) { + $output[] = $item->toNative(); + } + + return $output; + case self::DNULL: + return null; + default: + throw new MWException( "Unknown type" ); + } + } + + /** + * @return bool + */ + public function toBool() { + return self::castTypes( $this, self::DBOOL )->data; + } + + /** + * @return string + */ + public function toString() { + return self::castTypes( $this, self::DSTRING )->data; + } + + /** + * @return float + */ + public function toFloat() { + return self::castTypes( $this, self::DFLOAT )->data; + } + + /** + * @return int + */ + public function toInt() { + return self::castTypes( $this, self::DINT )->data; + } + + /** + * @return int|float + */ + public function toNumber() { + return $this->type == self::DINT ? $this->toInt() : $this->toFloat(); + } + + public function toList() { + return self::castTypes( $this, self::DLIST )->data; + } +} diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AFPException.php b/www/wiki/extensions/AbuseFilter/includes/parser/AFPException.php new file mode 100644 index 00000000..51fe4442 --- /dev/null +++ b/www/wiki/extensions/AbuseFilter/includes/parser/AFPException.php @@ -0,0 +1,4 @@ +<?php + +class AFPException extends MWException { +} diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AFPParserState.php b/www/wiki/extensions/AbuseFilter/includes/parser/AFPParserState.php new file mode 100644 index 00000000..7a4f5a73 --- /dev/null +++ b/www/wiki/extensions/AbuseFilter/includes/parser/AFPParserState.php @@ -0,0 +1,10 @@ +<?php + +class AFPParserState { + public $pos, $token; + + public function __construct( $token, $pos ) { + $this->token = $token; + $this->pos = $pos; + } +} diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AFPToken.php b/www/wiki/extensions/AbuseFilter/includes/parser/AFPToken.php new file mode 100644 index 00000000..2f7d9c99 --- /dev/null +++ b/www/wiki/extensions/AbuseFilter/includes/parser/AFPToken.php @@ -0,0 +1,61 @@ +<?php +/** + * Abuse filter parser. + * Copyright © Victor Vasiliev, 2008. + * Based on ideas by Andrew Garrett + * Distributed under GNU GPL v2 terms. + * + * Types of token: + * * T_NONE - special-purpose token + * * T_BRACE - ( or ) + * * T_COMMA - , + * * T_OP - operator like + or ^ + * * T_NUMBER - number + * * T_STRING - string, in "" or '' + * * T_KEYWORD - keyword + * * T_ID - identifier + * * T_STATEMENT_SEPARATOR - ; + * * T_SQUARE_BRACKETS - [ or ] + * + * Levels of parsing: + * * Entry - catches unexpected characters + * * Semicolon - ; + * * Set - := + * * Conditionls (IF) - if-then-else-end, cond ? a :b + * * BoolOps (BO) - &, |, ^ + * * CompOps (CO) - ==, !=, ===, !==, >, <, >=, <= + * * SumRel (SR) - +, - + * * MulRel (MR) - *, /, % + * * Pow (P) - ** + * * BoolNeg (BN) - ! operation + * * SpecialOperators (SO) - in and like + * * Unarys (U) - plus and minus in cases like -5 or -(2 * +2) + * * ListElement (LE) - list[number] + * * Braces (B) - ( and ) + * * Functions (F) + * * Atom (A) - return value + */ +class AFPToken { + // Types of tken + const TNONE = 'T_NONE'; + const TID = 'T_ID'; + const TKEYWORD = 'T_KEYWORD'; + const TSTRING = 'T_STRING'; + const TINT = 'T_INT'; + const TFLOAT = 'T_FLOAT'; + const TOP = 'T_OP'; + const TBRACE = 'T_BRACE'; + const TSQUAREBRACKET = 'T_SQUARE_BRACKET'; + const TCOMMA = 'T_COMMA'; + const TSTATEMENTSEPARATOR = 'T_STATEMENT_SEPARATOR'; + + public $type; + public $value; + public $pos; + + public function __construct( $type = self::TNONE, $value = null, $pos = 0 ) { + $this->type = $type; + $this->value = $value; + $this->pos = $pos; + } +} diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AFPTreeNode.php b/www/wiki/extensions/AbuseFilter/includes/parser/AFPTreeNode.php new file mode 100644 index 00000000..e185616c --- /dev/null +++ b/www/wiki/extensions/AbuseFilter/includes/parser/AFPTreeNode.php @@ -0,0 +1,126 @@ +<?php +/** + * Represents a node of a parser tree. + */ +class AFPTreeNode { + // Each of the constants below represents a node corresponding to a level + // of the parser, from the top of the tree to the bottom. + + // ENTRY is always one-element and thus does not have its own node. + + // SEMICOLON is a many-children node, denoting that the nodes have to be + // evaluated in order and the last value has to be returned. + const SEMICOLON = 'SEMICOLON'; + + // ASSIGNMENT (formerly known as SET) is a node which is responsible for + // assigning values to variables. ASSIGNMENT is a (variable name [string], + // value [tree node]) tuple, INDEX_ASSIGNMENT (which is used to assign + // values at list offsets) is a (variable name [string], index [tree node], + // value [tree node]) tuple, and LIST_APPEND has the form of (variable name + // [string], value [tree node]). + const ASSIGNMENT = 'ASSIGNMENT'; + const INDEX_ASSIGNMENT = 'INDEX_ASSIGNMENT'; + const LIST_APPEND = 'LIST_APPEND'; + + // CONDITIONAL represents both a ternary operator and an if-then-else-end + // construct. The format is (condition, evaluated-if-true, + // evaluated-in-false), all tree nodes. + const CONDITIONAL = 'CONDITIONAL'; + + // LOGIC is a logic operator accepted by AFPData::boolOp. The format is + // (operation, left operand, right operand). + const LOGIC = 'LOGIC'; + + // COMPARE is a comparison operator accepted by AFPData::boolOp. The format is + // (operation, left operand, right operand). + const COMPARE = 'COMPARE'; + + // SUM_REL is either '+' or '-'. The format is (operation, left operand, + // right operand). + const SUM_REL = 'SUM_REL'; + + // MUL_REL is a multiplication-related operation accepted by AFPData::mulRel. + // The format is (operation, left operand, right operand). + const MUL_REL = 'MUL_REL'; + + // POW is an exponentiation operator. The format is (base, exponent). + const POW = 'POW'; + + // BOOL_INVERT is a boolean inversion operator. The format is (operand). + const BOOL_INVERT = 'BOOL_INVERT'; + + // KEYWORD_OPERATOR is one of the binary keyword operators supported by the + // filter language. The format is (keyword, left operand, right operand). + const KEYWORD_OPERATOR = 'KEYWORD_OPERATOR'; + + // UNARY is either unary minus or unary plus. The format is (operator, + // operand). + const UNARY = 'UNARY'; + + // LIST_INDEX is an operation of accessing a list by an offset. The format + // is (list, offset). + const LIST_INDEX = 'LIST_INDEX'; + + // Since parenthesis only manipulate precedence of the operators, they are + // not explicitly represented in the tree. + + // FUNCTION_CALL is an invocation of built-in function. The format is a + // tuple where the first element is a function name, and all subsequent + // elements are the arguments. + const FUNCTION_CALL = 'FUNCTION_CALL'; + + // LIST_DEFINITION is a list literal. The $children field contains tree + // nodes for the values of each of the list element used. + const LIST_DEFINITION = 'LIST_DEFINITION'; + + // ATOM is a node representing a literal. The only element of $children is a + // token corresponding to the literal. + const ATOM = 'ATOM'; + + /** @var string Type of the node, one of the constants above */ + public $type; + /** + * Parameters of the value. Typically it is an array of children nodes, + * which might be either strings (for parametrization of the node) or another + * node. In case of ATOM it's a parser token. + * @var AFPTreeNode[]|string[]|AFPToken + */ + public $children; + + // Position used for error reporting. + public $position; + + public function __construct( $type, $children, $position ) { + $this->type = $type; + $this->children = $children; + $this->position = $position; + } + + public function toDebugString() { + return implode( "\n", $this->toDebugStringInner() ); + } + + private function toDebugStringInner() { + if ( $this->type == self::ATOM ) { + return [ "ATOM({$this->children->type} {$this->children->value})" ]; + } + + $align = function ( $line ) { + return ' ' . $line; + }; + + $lines = [ "{$this->type}" ]; + foreach ( $this->children as $subnode ) { + if ( $subnode instanceof AFPTreeNode ) { + $sublines = array_map( $align, $subnode->toDebugStringInner() ); + } elseif ( is_string( $subnode ) ) { + $sublines = [ " {$subnode}" ]; + } else { + throw new AFPException( "Each node parameter has to be either a node or a string" ); + } + + $lines = array_merge( $lines, $sublines ); + } + return $lines; + } +} diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AFPTreeParser.php b/www/wiki/extensions/AbuseFilter/includes/parser/AFPTreeParser.php new file mode 100644 index 00000000..345adcb8 --- /dev/null +++ b/www/wiki/extensions/AbuseFilter/includes/parser/AFPTreeParser.php @@ -0,0 +1,611 @@ +<?php + +/** + * A version of the abuse filter parser that separates parsing the filter and + * evaluating it into different passes, allowing the parse tree to be cached. + * + * @file + */ + +/** + * A parser that transforms the text of the filter into a parse tree. + */ +class AFPTreeParser { + // The tokenized representation of the filter parsed. + public $mTokens; + + // Current token handled by the parser and its position. + public $mCur, $mPos; + + const CACHE_VERSION = 2; + + /** + * Create a new instance + */ + public function __construct() { + $this->resetState(); + } + + public function resetState() { + $this->mTokens = []; + $this->mPos = 0; + } + + /** + * Advances the parser to the next token in the filter code. + */ + protected function move() { + list( $this->mCur, $this->mPos ) = $this->mTokens[$this->mPos]; + } + + /** + * getState() function allows parser state to be rollbacked to several tokens + * back. + * + * @return AFPParserState + */ + protected function getState() { + return new AFPParserState( $this->mCur, $this->mPos ); + } + + /** + * setState() function allows parser state to be rollbacked to several tokens + * back. + * + * @param AFPParserState $state + */ + protected function setState( AFPParserState $state ) { + $this->mCur = $state->token; + $this->mPos = $state->pos; + } + + /** + * Parse the supplied filter source code into a tree. + * + * @param string $code + * @throws AFPUserVisibleException + * @return AFPTreeNode|null + */ + public function parse( $code ) { + $this->mTokens = AbuseFilterTokenizer::tokenize( $code ); + $this->mPos = 0; + + return $this->doLevelEntry(); + } + + /* Levels */ + + /** + * Handles unexpected characters after the expression. + * @return AFPTreeNode|null + * @throws AFPUserVisibleException + */ + protected function doLevelEntry() { + $result = $this->doLevelSemicolon(); + + if ( $this->mCur->type != AFPToken::TNONE ) { + throw new AFPUserVisibleException( + 'unexpectedatend', + $this->mPos, [ $this->mCur->type ] + ); + } + + return $result; + } + + /** + * Handles the semicolon operator. + * + * @return AFPTreeNode|null + */ + protected function doLevelSemicolon() { + $statements = []; + + do { + $this->move(); + $position = $this->mPos; + + if ( $this->mCur->type == AFPToken::TNONE ) { + break; + } + + // Allow empty statements. + if ( $this->mCur->type == AFPToken::TSTATEMENTSEPARATOR ) { + continue; + } + + $statements[] = $this->doLevelSet(); + $position = $this->mPos; + } while ( $this->mCur->type == AFPToken::TSTATEMENTSEPARATOR ); + + // Flatten the tree if possible. + if ( count( $statements ) == 0 ) { + return null; + } elseif ( count( $statements ) == 1 ) { + return $statements[0]; + } else { + return new AFPTreeNode( AFPTreeNode::SEMICOLON, $statements, $position ); + } + } + + /** + * Handles variable assignment. + * + * @return AFPTreeNode + * @throws AFPUserVisibleException + */ + protected function doLevelSet() { + if ( $this->mCur->type == AFPToken::TID ) { + $varname = $this->mCur->value; + + // Speculatively parse the assignment statement assuming it can + // potentially be an assignment, but roll back if it isn't. + $initialState = $this->getState(); + $this->move(); + + if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == ':=' ) { + $position = $this->mPos; + $this->move(); + $value = $this->doLevelSet(); + + return new AFPTreeNode( AFPTreeNode::ASSIGNMENT, [ $varname, $value ], $position ); + } + + if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == '[' ) { + $this->move(); + + if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) { + $index = 'append'; + } else { + // Parse index offset. + $this->setState( $initialState ); + $this->move(); + $index = $this->doLevelSemicolon(); + if ( !( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) ) { + throw new AFPUserVisibleException( 'expectednotfound', $this->mPos, + [ ']', $this->mCur->type, $this->mCur->value ] ); + } + } + + $this->move(); + if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == ':=' ) { + $position = $this->mPos; + $this->move(); + $value = $this->doLevelSet(); + if ( $index === 'append' ) { + return new AFPTreeNode( + AFPTreeNode::LIST_APPEND, [ $varname, $value ], $position ); + } else { + return new AFPTreeNode( + AFPTreeNode::INDEX_ASSIGNMENT, + [ $varname, $index, $value ], + $position + ); + } + } + } + + // If we reached this point, we did not find an assignment. Roll back + // and assume this was just a literal. + $this->setState( $initialState ); + } + + return $this->doLevelConditions(); + } + + /** + * Handles ternary operator and if-then-else-end. + * + * @return AFPTreeNode + * @throws AFPUserVisibleException + */ + protected function doLevelConditions() { + if ( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'if' ) { + $position = $this->mPos; + $this->move(); + $condition = $this->doLevelBoolOps(); + + if ( !( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'then' ) ) { + throw new AFPUserVisibleException( 'expectednotfound', + $this->mPos, + [ + 'then', + $this->mCur->type, + $this->mCur->value + ] + ); + } + $this->move(); + + $valueIfTrue = $this->doLevelConditions(); + + if ( !( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'else' ) ) { + throw new AFPUserVisibleException( 'expectednotfound', + $this->mPos, + [ + 'else', + $this->mCur->type, + $this->mCur->value + ] + ); + } + $this->move(); + + $valueIfFalse = $this->doLevelConditions(); + + if ( !( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'end' ) ) { + throw new AFPUserVisibleException( 'expectednotfound', + $this->mPos, + [ + 'end', + $this->mCur->type, + $this->mCur->value + ] + ); + } + $this->move(); + + return new AFPTreeNode( + AFPTreeNode::CONDITIONAL, + [ $condition, $valueIfTrue, $valueIfFalse ], + $position + ); + } + + $condition = $this->doLevelBoolOps(); + if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == '?' ) { + $position = $this->mPos; + $this->move(); + + $valueIfTrue = $this->doLevelConditions(); + if ( !( $this->mCur->type == AFPToken::TOP && $this->mCur->value == ':' ) ) { + throw new AFPUserVisibleException( 'expectednotfound', + $this->mPos, + [ + ':', + $this->mCur->type, + $this->mCur->value + ] + ); + } + $this->move(); + + $valueIfFalse = $this->doLevelConditions(); + return new AFPTreeNode( + AFPTreeNode::CONDITIONAL, + [ $condition, $valueIfTrue, $valueIfFalse ], + $position + ); + } + + return $condition; + } + + /** + * Handles logic operators. + * + * @return AFPTreeNode + */ + protected function doLevelBoolOps() { + $leftOperand = $this->doLevelCompares(); + $ops = [ '&', '|', '^' ]; + while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) { + $op = $this->mCur->value; + $position = $this->mPos; + $this->move(); + + $rightOperand = $this->doLevelCompares(); + + $leftOperand = new AFPTreeNode( + AFPTreeNode::LOGIC, + [ $op, $leftOperand, $rightOperand ], + $position + ); + } + return $leftOperand; + } + + /** + * Handles comparison operators. + * + * @return AFPTreeNode + */ + protected function doLevelCompares() { + $leftOperand = $this->doLevelSumRels(); + $ops = [ '==', '===', '!=', '!==', '<', '>', '<=', '>=', '=' ]; + while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) { + $op = $this->mCur->value; + $position = $this->mPos; + $this->move(); + $rightOperand = $this->doLevelSumRels(); + $leftOperand = new AFPTreeNode( + AFPTreeNode::COMPARE, + [ $op, $leftOperand, $rightOperand ], + $position + ); + } + return $leftOperand; + } + + /** + * Handle addition and subtraction. + * + * @return AFPTreeNode + */ + protected function doLevelSumRels() { + $leftOperand = $this->doLevelMulRels(); + $ops = [ '+', '-' ]; + while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) { + $op = $this->mCur->value; + $position = $this->mPos; + $this->move(); + $rightOperand = $this->doLevelMulRels(); + $leftOperand = new AFPTreeNode( + AFPTreeNode::SUM_REL, + [ $op, $leftOperand, $rightOperand ], + $position + ); + } + return $leftOperand; + } + + /** + * Handles multiplication and division. + * + * @return AFPTreeNode + */ + protected function doLevelMulRels() { + $leftOperand = $this->doLevelPow(); + $ops = [ '*', '/', '%' ]; + while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) { + $op = $this->mCur->value; + $position = $this->mPos; + $this->move(); + $rightOperand = $this->doLevelPow(); + $leftOperand = new AFPTreeNode( + AFPTreeNode::MUL_REL, + [ $op, $leftOperand, $rightOperand ], + $position + ); + } + return $leftOperand; + } + + /** + * Handles exponentiation. + * + * @return AFPTreeNode + */ + protected function doLevelPow() { + $base = $this->doLevelBoolInvert(); + while ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == '**' ) { + $position = $this->mPos; + $this->move(); + $exponent = $this->doLevelBoolInvert(); + $base = new AFPTreeNode( AFPTreeNode::POW, [ $base, $exponent ], $position ); + } + return $base; + } + + /** + * Handles boolean inversion. + * + * @return AFPTreeNode + */ + protected function doLevelBoolInvert() { + if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == '!' ) { + $position = $this->mPos; + $this->move(); + $argument = $this->doLevelKeywordOperators(); + return new AFPTreeNode( AFPTreeNode::BOOL_INVERT, [ $argument ], $position ); + } + + return $this->doLevelKeywordOperators(); + } + + /** + * Handles keyword operators. + * + * @return AFPTreeNode + */ + protected function doLevelKeywordOperators() { + $leftOperand = $this->doLevelUnarys(); + $keyword = strtolower( $this->mCur->value ); + if ( $this->mCur->type == AFPToken::TKEYWORD && + in_array( $keyword, array_keys( AbuseFilterParser::$mKeywords ) ) + ) { + $position = $this->mPos; + $this->move(); + $rightOperand = $this->doLevelUnarys(); + + return new AFPTreeNode( + AFPTreeNode::KEYWORD_OPERATOR, + [ $keyword, $leftOperand, $rightOperand ], + $position + ); + } + + return $leftOperand; + } + + /** + * Handles unary operators. + * + * @return AFPTreeNode + */ + protected function doLevelUnarys() { + $op = $this->mCur->value; + if ( $this->mCur->type == AFPToken::TOP && ( $op == "+" || $op == "-" ) ) { + $position = $this->mPos; + $this->move(); + $argument = $this->doLevelListElements(); + return new AFPTreeNode( AFPTreeNode::UNARY, [ $op, $argument ], $position ); + } + return $this->doLevelListElements(); + } + + /** + * Handles accessing a list element by an offset. + * + * @return AFPTreeNode + * @throws AFPUserVisibleException + */ + protected function doLevelListElements() { + $list = $this->doLevelParenthesis(); + while ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == '[' ) { + $position = $this->mPos; + $index = $this->doLevelSemicolon(); + $list = new AFPTreeNode( AFPTreeNode::LIST_INDEX, [ $list, $index ], $position ); + + if ( !( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) ) { + throw new AFPUserVisibleException( 'expectednotfound', $this->mPos, + [ ']', $this->mCur->type, $this->mCur->value ] ); + } + $this->move(); + } + + return $list; + } + + /** + * Handles parenthesis. + * + * @return AFPTreeNode + * @throws AFPUserVisibleException + */ + protected function doLevelParenthesis() { + if ( $this->mCur->type == AFPToken::TBRACE && $this->mCur->value == '(' ) { + $result = $this->doLevelSemicolon(); + + if ( !( $this->mCur->type == AFPToken::TBRACE && $this->mCur->value == ')' ) ) { + throw new AFPUserVisibleException( + 'expectednotfound', + $this->mPos, + [ ')', $this->mCur->type, $this->mCur->value ] + ); + } + $this->move(); + + return $result; + } + + return $this->doLevelFunction(); + } + + /** + * Handles function calls. + * + * @return AFPTreeNode + * @throws AFPUserVisibleException + */ + protected function doLevelFunction() { + if ( $this->mCur->type == AFPToken::TID && + isset( AbuseFilterParser::$mFunctions[$this->mCur->value] ) + ) { + $func = $this->mCur->value; + $position = $this->mPos; + $this->move(); + if ( $this->mCur->type != AFPToken::TBRACE || $this->mCur->value != '(' ) { + throw new AFPUserVisibleException( 'expectednotfound', + $this->mPos, + [ + '(', + $this->mCur->type, + $this->mCur->value + ] + ); + } + + $args = []; + do { + $args[] = $this->doLevelSemicolon(); + } while ( $this->mCur->type == AFPToken::TCOMMA ); + + if ( $this->mCur->type != AFPToken::TBRACE || $this->mCur->value != ')' ) { + throw new AFPUserVisibleException( 'expectednotfound', + $this->mPos, + [ + ')', + $this->mCur->type, + $this->mCur->value + ] + ); + } + $this->move(); + + array_unshift( $args, $func ); + return new AFPTreeNode( AFPTreeNode::FUNCTION_CALL, $args, $position ); + } + + return $this->doLevelAtom(); + } + + /** + * Handle literals. + * @return AFPTreeNode + * @throws AFPUserVisibleException + */ + protected function doLevelAtom() { + $tok = $this->mCur->value; + switch ( $this->mCur->type ) { + case AFPToken::TID: + case AFPToken::TSTRING: + case AFPToken::TFLOAT: + case AFPToken::TINT: + $result = new AFPTreeNode( AFPTreeNode::ATOM, $this->mCur, $this->mPos ); + break; + case AFPToken::TKEYWORD: + if ( in_array( $tok, [ "true", "false", "null" ] ) ) { + $result = new AFPTreeNode( AFPTreeNode::ATOM, $this->mCur, $this->mPos ); + break; + } + + throw new AFPUserVisibleException( + 'unrecognisedkeyword', + $this->mPos, + [ $tok ] + ); + /** @noinspection PhpMissingBreakStatementInspection */ + case AFPToken::TSQUAREBRACKET: + if ( $this->mCur->value == '[' ) { + $list = []; + while ( true ) { + $this->move(); + if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) { + break; + } + + $list[] = $this->doLevelSet(); + + if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) { + break; + } + if ( $this->mCur->type != AFPToken::TCOMMA ) { + throw new AFPUserVisibleException( + 'expectednotfound', + $this->mPos, + [ ', or ]', $this->mCur->type, $this->mCur->value ] + ); + } + } + + $result = new AFPTreeNode( AFPTreeNode::LIST_DEFINITION, $list, $this->mPos ); + break; + } + + // Fallthrough expected + default: + throw new AFPUserVisibleException( + 'unexpectedtoken', + $this->mPos, + [ + $this->mCur->type, + $this->mCur->value + ] + ); + } + + $this->move(); + return $result; + } +} diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AFPUserVisibleException.php b/www/wiki/extensions/AbuseFilter/includes/parser/AFPUserVisibleException.php new file mode 100644 index 00000000..b6e89d03 --- /dev/null +++ b/www/wiki/extensions/AbuseFilter/includes/parser/AFPUserVisibleException.php @@ -0,0 +1,40 @@ +<?php + +// Exceptions that we might conceivably want to report to ordinary users +// (i.e. exceptions that don't represent bugs in the extension itself) +class AFPUserVisibleException extends AFPException { + public $mExceptionId; + public $mPosition; + public $mParams; + + /** + * @param string $exception_id + * @param int $position + * @param array $params + */ + function __construct( $exception_id, $position, $params ) { + $this->mExceptionID = $exception_id; + $this->mPosition = $position; + $this->mParams = $params; + + // Exception message text for logs should be in English. + $msg = $this->getMessageObj()->inLanguage( 'en' )->useDatabase( false )->text(); + parent::__construct( $msg ); + } + + public function getMessageObj() { + // Give grep a chance to find the usages: + // abusefilter-exception-unexpectedatend, abusefilter-exception-expectednotfound + // abusefilter-exception-unrecognisedkeyword, abusefilter-exception-unexpectedtoken + // abusefilter-exception-unclosedstring, abusefilter-exception-invalidoperator + // abusefilter-exception-unrecognisedtoken, abusefilter-exception-noparams + // abusefilter-exception-dividebyzero, abusefilter-exception-unrecognisedvar + // abusefilter-exception-notenoughargs, abusefilter-exception-regexfailure + // abusefilter-exception-overridebuiltin, abusefilter-exception-outofbounds + // abusefilter-exception-notlist, abusefilter-exception-unclosedcomment + return wfMessage( + 'abusefilter-exception-' . $this->mExceptionID, + array_merge( [ $this->mPosition ], $this->mParams ) + ); + } +} diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterCachingParser.php b/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterCachingParser.php new file mode 100644 index 00000000..37384356 --- /dev/null +++ b/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterCachingParser.php @@ -0,0 +1,279 @@ +<?php +/** + * AbuseFilterCachingParser is the version of AbuseFilterParser which parses + * the code into an abstract syntax tree before evaluating it, and caches that + * tree. + * + * It currently inherits AbuseFilterParser in order to avoid code duplication. + * In future, this code will replace current AbuseFilterParser entirely. + */ +class AbuseFilterCachingParser extends AbuseFilterParser { + /** + * Return the generated version of the parser for cache invalidation + * purposes. Automatically tracks list of all functions and invalidates the + * cache if it is changed. + * @return string + */ + public static function getCacheVersion() { + static $version = null; + if ( $version !== null ) { + return $version; + } + + $versionKey = [ + AFPTreeParser::CACHE_VERSION, + AbuseFilterTokenizer::CACHE_VERSION, + array_keys( AbuseFilterParser::$mFunctions ), + array_keys( AbuseFilterParser::$mKeywords ), + ]; + $version = hash( 'sha256', serialize( $versionKey ) ); + + return $version; + } + + public function resetState() { + $this->mVars = new AbuseFilterVariableHolder; + $this->mCur = new AFPToken(); + } + + public function intEval( $code ) { + static $cache = null; + if ( !$cache ) { + $cache = ObjectCache::getLocalServerInstance( 'hash' ); + } + + $tree = $cache->getWithSetCallback( + $cache->makeGlobalKey( + __CLASS__, + self::getCacheVersion(), + hash( 'sha256', $code ) + ), + $cache::TTL_DAY, + function () use ( $code ) { + $parser = new AFPTreeParser(); + return $parser->parse( $code ) ?: false; + } + ); + + return $tree + ? $this->evalNode( $tree ) + : new AFPData( AFPData::DNULL, null ); + } + + /** + * Evaluate the value of the specified AST node. + * + * @param AFPTreeNode $node The node to evaluate. + * @return AFPData + * @throws AFPException + * @throws AFPUserVisibleException + * @throws MWException + */ + public function evalNode( AFPTreeNode $node ) { + // A lot of AbuseFilterParser features rely on $this->mCur->pos or + // $this->mPos for error reporting. + // FIXME: this is a hack which needs to be removed when the parsers are + // merged. + $this->mPos = $node->position; + $this->mCur->pos = $node->position; + + switch ( $node->type ) { + case AFPTreeNode::ATOM: + $tok = $node->children; + switch ( $tok->type ) { + case AFPToken::TID: + return $this->getVarValue( strtolower( $tok->value ) ); + case AFPToken::TSTRING: + return new AFPData( AFPData::DSTRING, $tok->value ); + case AFPToken::TFLOAT: + return new AFPData( AFPData::DFLOAT, $tok->value ); + case AFPToken::TINT: + return new AFPData( AFPData::DINT, $tok->value ); + /** @noinspection PhpMissingBreakStatementInspection */ + case AFPToken::TKEYWORD: + switch ( $tok->value ) { + case "true": + return new AFPData( AFPData::DBOOL, true ); + case "false": + return new AFPData( AFPData::DBOOL, false ); + case "null": + return new AFPData(); + } + // Fallthrough intended + default: + throw new AFPException( "Unknown token provided in the ATOM node" ); + } + case AFPTreeNode::LIST_DEFINITION: + $items = array_map( [ $this, 'evalNode' ], $node->children ); + return new AFPData( AFPData::DLIST, $items ); + + case AFPTreeNode::FUNCTION_CALL: + $functionName = $node->children[0]; + $args = array_slice( $node->children, 1 ); + + $func = self::$mFunctions[$functionName]; + $dataArgs = array_map( [ $this, 'evalNode' ], $args ); + + /** @noinspection PhpToStringImplementationInspection */ + $funcHash = md5( $func . serialize( $dataArgs ) ); + + if ( isset( self::$funcCache[$funcHash] ) && + !in_array( $func, self::$ActiveFunctions ) + ) { + $result = self::$funcCache[$funcHash]; + } else { + AbuseFilter::triggerLimiter(); + $result = self::$funcCache[$funcHash] = $this->$func( $dataArgs ); + } + + if ( count( self::$funcCache ) > 1000 ) { + self::$funcCache = []; + } + + return $result; + + case AFPTreeNode::LIST_INDEX: + list( $list, $offset ) = $node->children; + + $list = $this->evalNode( $list ); + if ( $list->type != AFPData::DLIST ) { + throw new AFPUserVisibleException( 'notlist', $node->position, [] ); + } + + $offset = $this->evalNode( $offset )->toInt(); + + $list = $list->toList(); + if ( count( $list ) <= $offset ) { + throw new AFPUserVisibleException( 'outofbounds', $node->position, + [ $offset, count( $list ) ] ); + } + + return $list[$offset]; + + case AFPTreeNode::UNARY: + list( $operation, $argument ) = $node->children; + $argument = $this->evalNode( $argument ); + if ( $operation == '-' ) { + return AFPData::unaryMinus( $argument ); + } + return $argument; + + case AFPTreeNode::KEYWORD_OPERATOR: + list( $keyword, $leftOperand, $rightOperand ) = $node->children; + $func = self::$mKeywords[$keyword]; + $leftOperand = $this->evalNode( $leftOperand ); + $rightOperand = $this->evalNode( $rightOperand ); + + AbuseFilter::triggerLimiter(); + $result = AFPData::$func( $leftOperand, $rightOperand, $node->position ); + + return $result; + case AFPTreeNode::BOOL_INVERT: + list( $argument ) = $node->children; + $argument = $this->evalNode( $argument ); + return AFPData::boolInvert( $argument ); + + case AFPTreeNode::POW: + list( $base, $exponent ) = $node->children; + $base = $this->evalNode( $base ); + $exponent = $this->evalNode( $exponent ); + return AFPData::pow( $base, $exponent ); + + case AFPTreeNode::MUL_REL: + list( $op, $leftOperand, $rightOperand ) = $node->children; + $leftOperand = $this->evalNode( $leftOperand ); + $rightOperand = $this->evalNode( $rightOperand ); + return AFPData::mulRel( $leftOperand, $rightOperand, $op, /* FIXME */ + 0 ); + + case AFPTreeNode::SUM_REL: + list( $op, $leftOperand, $rightOperand ) = $node->children; + $leftOperand = $this->evalNode( $leftOperand ); + $rightOperand = $this->evalNode( $rightOperand ); + switch ( $op ) { + case '+': + return AFPData::sum( $leftOperand, $rightOperand ); + case '-': + return AFPData::sub( $leftOperand, $rightOperand ); + default: + throw new AFPException( "Unknown sum-related operator: {$op}" ); + } + + case AFPTreeNode::COMPARE: + list( $op, $leftOperand, $rightOperand ) = $node->children; + $leftOperand = $this->evalNode( $leftOperand ); + $rightOperand = $this->evalNode( $rightOperand ); + AbuseFilter::triggerLimiter(); + return AFPData::compareOp( $leftOperand, $rightOperand, $op ); + + case AFPTreeNode::LOGIC: + list( $op, $leftOperand, $rightOperand ) = $node->children; + $leftOperand = $this->evalNode( $leftOperand ); + $value = $leftOperand->toBool(); + // Short-circuit. + if ( ( !$value && $op == '&' ) || ( $value && $op == '|' ) ) { + return $leftOperand; + } + $rightOperand = $this->evalNode( $rightOperand ); + return AFPData::boolOp( $leftOperand, $rightOperand, $op ); + + case AFPTreeNode::CONDITIONAL: + list( $condition, $valueIfTrue, $valueIfFalse ) = $node->children; + $condition = $this->evalNode( $condition ); + if ( $condition->toBool() ) { + return $this->evalNode( $valueIfTrue ); + } else { + return $this->evalNode( $valueIfFalse ); + } + + case AFPTreeNode::ASSIGNMENT: + list( $varName, $value ) = $node->children; + $value = $this->evalNode( $value ); + $this->setUserVariable( $varName, $value ); + return $value; + + case AFPTreeNode::INDEX_ASSIGNMENT: + list( $varName, $offset, $value ) = $node->children; + + $list = $this->mVars->getVar( $varName ); + if ( $list->type != AFPData::DLIST ) { + throw new AFPUserVisibleException( 'notlist', $node->position, [] ); + } + + $offset = $this->evalNode( $offset )->toInt(); + + $list = $list->toList(); + if ( count( $list ) <= $offset ) { + throw new AFPUserVisibleException( 'outofbounds', $node->position, + [ $offset, count( $list ) ] ); + } + + $list[$offset] = $this->evalNode( $value ); + $this->setUserVariable( $varName, new AFPData( AFPData::DLIST, $list ) ); + return $value; + + case AFPTreeNode::LIST_APPEND: + list( $varName, $value ) = $node->children; + + $list = $this->mVars->getVar( $varName ); + if ( $list->type != AFPData::DLIST ) { + throw new AFPUserVisibleException( 'notlist', $node->position, [] ); + } + + $list = $list->toList(); + $list[] = $this->evalNode( $value ); + $this->setUserVariable( $varName, new AFPData( AFPData::DLIST, $list ) ); + return $value; + + case AFPTreeNode::SEMICOLON: + $lastValue = null; + foreach ( $node->children as $statement ) { + $lastValue = $this->evalNode( $statement ); + } + + return $lastValue; + default: + throw new AFPException( "Unknown node type passed: {$node->type}" ); + } + } +} diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterParser.php b/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterParser.php new file mode 100644 index 00000000..50f8dddc --- /dev/null +++ b/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterParser.php @@ -0,0 +1,1560 @@ +<?php + +use Wikimedia\Equivset\Equivset; + +class AbuseFilterParser { + public $mCode, $mTokens, $mPos, $mCur, $mShortCircuit, $mAllowShort, $mLen; + + /** + * @var AbuseFilterVariableHolder + */ + public $mVars; + + // length,lcase,ucase,ccnorm,rmdoubles,specialratio,rmspecials,norm,count,get_matches + public static $mFunctions = [ + 'lcase' => 'funcLc', + 'ucase' => 'funcUc', + 'length' => 'funcLen', + 'string' => 'castString', + 'int' => 'castInt', + 'float' => 'castFloat', + 'bool' => 'castBool', + 'norm' => 'funcNorm', + 'ccnorm' => 'funcCCNorm', + 'ccnorm_contains_any' => 'funcCCNormContainsAny', + 'ccnorm_contains_all' => 'funcCCNormContainsAll', + 'specialratio' => 'funcSpecialRatio', + 'rmspecials' => 'funcRMSpecials', + 'rmdoubles' => 'funcRMDoubles', + 'rmwhitespace' => 'funcRMWhitespace', + 'count' => 'funcCount', + 'rcount' => 'funcRCount', + 'get_matches' => 'funcGetMatches', + 'ip_in_range' => 'funcIPInRange', + 'contains_any' => 'funcContainsAny', + 'contains_all' => 'funcContainsAll', + 'substr' => 'funcSubstr', + 'strlen' => 'funcLen', + 'strpos' => 'funcStrPos', + 'str_replace' => 'funcStrReplace', + 'rescape' => 'funcStrRegexEscape', + 'set' => 'funcSetVar', + 'set_var' => 'funcSetVar', + ]; + + // Functions that affect parser state, and shouldn't be cached. + public static $ActiveFunctions = [ + 'funcSetVar', + ]; + + public static $mKeywords = [ + 'in' => 'keywordIn', + 'like' => 'keywordLike', + 'matches' => 'keywordLike', + 'contains' => 'keywordContains', + 'rlike' => 'keywordRegex', + 'irlike' => 'keywordRegexInsensitive', + 'regex' => 'keywordRegex', + ]; + + public static $funcCache = []; + + /** + * @var Equivset + */ + protected static $equivset; + + /** + * Create a new instance + * + * @param AbuseFilterVariableHolder $vars + */ + public function __construct( $vars = null ) { + $this->resetState(); + if ( $vars instanceof AbuseFilterVariableHolder ) { + $this->mVars = $vars; + } + } + + public function resetState() { + $this->mCode = ''; + $this->mTokens = []; + $this->mVars = new AbuseFilterVariableHolder; + $this->mPos = 0; + $this->mShortCircuit = false; + $this->mAllowShort = true; + } + + /** + * @param string $filter + * @return array|bool + */ + public function checkSyntax( $filter ) { + try { + $origAS = $this->mAllowShort; + $this->mAllowShort = false; + $this->parse( $filter ); + } catch ( AFPUserVisibleException $excep ) { + $this->mAllowShort = $origAS; + + return [ $excep->getMessageObj()->text(), $excep->mPosition ]; + } + $this->mAllowShort = $origAS; + + return true; + } + + /** + * @param string $name + * @param mixed $value + */ + public function setVar( $name, $value ) { + $this->mVars->setVar( $name, $value ); + } + + /** + * @param mixed $vars + */ + public function setVars( $vars ) { + if ( is_array( $vars ) ) { + foreach ( $vars as $name => $var ) { + $this->setVar( $name, $var ); + } + } elseif ( $vars instanceof AbuseFilterVariableHolder ) { + $this->mVars->addHolders( $vars ); + } + } + + /** + * @return AFPToken + */ + protected function move() { + list( $this->mCur, $this->mPos ) = $this->mTokens[$this->mPos]; + } + + /** + * getState() function allows parser state to be rollbacked to several tokens back + * @return AFPParserState + */ + protected function getState() { + return new AFPParserState( $this->mCur, $this->mPos ); + } + + /** + * setState() function allows parser state to be rollbacked to several tokens back + * @param AFPParserState $state + */ + protected function setState( AFPParserState $state ) { + $this->mCur = $state->token; + $this->mPos = $state->pos; + } + + /** + * @return mixed + * @throws AFPUserVisibleException + */ + protected function skipOverBraces() { + if ( !( $this->mCur->type == AFPToken::TBRACE && $this->mCur->value == '(' ) || + !$this->mShortCircuit + ) { + return; + } + + $braces = 1; + while ( $this->mCur->type != AFPToken::TNONE && $braces > 0 ) { + $this->move(); + if ( $this->mCur->type == AFPToken::TBRACE ) { + if ( $this->mCur->value == '(' ) { + $braces++; + } elseif ( $this->mCur->value == ')' ) { + $braces--; + } + } + } + if ( !( $this->mCur->type == AFPToken::TBRACE && $this->mCur->value == ')' ) ) { + throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos, [ ')' ] ); + } + } + + /** + * @param string $code + * @return bool + */ + public function parse( $code ) { + return $this->intEval( $code )->toBool(); + } + + /** + * @param string $filter + * @return string + */ + public function evaluateExpression( $filter ) { + return $this->intEval( $filter )->toString(); + } + + /** + * @param string $code + * @return AFPData + */ + public function intEval( $code ) { + // Setup, resetting + $this->mCode = $code; + $this->mTokens = AbuseFilterTokenizer::tokenize( $code ); + $this->mPos = 0; + $this->mLen = strlen( $code ); + $this->mShortCircuit = false; + + $result = new AFPData(); + $this->doLevelEntry( $result ); + + return $result; + } + + /** + * @param string $a + * @param string $b + * @return int + */ + static function lengthCompare( $a, $b ) { + if ( strlen( $a ) == strlen( $b ) ) { + return 0; + } + + return ( strlen( $a ) < strlen( $b ) ) ? -1 : 1; + } + + /* Levels */ + + /** + * Handles unexpected characters after the expression + * + * @param AFPData &$result + * @throws AFPUserVisibleException + */ + protected function doLevelEntry( &$result ) { + $this->doLevelSemicolon( $result ); + + if ( $this->mCur->type != AFPToken::TNONE ) { + throw new AFPUserVisibleException( + 'unexpectedatend', + $this->mCur->pos, [ $this->mCur->type ] + ); + } + } + + /** + * Handles multiple expressions + * @param AFPData &$result + */ + protected function doLevelSemicolon( &$result ) { + do { + $this->move(); + if ( $this->mCur->type != AFPToken::TSTATEMENTSEPARATOR ) { + $this->doLevelSet( $result ); + } + } while ( $this->mCur->type == AFPToken::TSTATEMENTSEPARATOR ); + } + + /** + * Handles multiple expressions + * + * @param AFPData &$result + * @throws AFPUserVisibleException + */ + protected function doLevelSet( &$result ) { + if ( $this->mCur->type == AFPToken::TID ) { + $varname = $this->mCur->value; + $prev = $this->getState(); + $this->move(); + + if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == ':=' ) { + $this->move(); + $this->doLevelSet( $result ); + $this->setUserVariable( $varname, $result ); + + return; + } elseif ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == '[' ) { + if ( !$this->mVars->varIsSet( $varname ) ) { + throw new AFPUserVisibleException( 'unrecognisedvar', + $this->mCur->pos, + [ $varname ] + ); + } + $list = $this->mVars->getVar( $varname ); + if ( $list->type != AFPData::DLIST ) { + throw new AFPUserVisibleException( 'notlist', $this->mCur->pos, [] ); + } + $list = $list->toList(); + $this->move(); + if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) { + $idx = 'new'; + } else { + $this->setState( $prev ); + $this->move(); + $idx = new AFPData(); + $this->doLevelSemicolon( $idx ); + $idx = $idx->toInt(); + if ( !( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) ) { + throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos, + [ ']', $this->mCur->type, $this->mCur->value ] ); + } + if ( count( $list ) <= $idx ) { + throw new AFPUserVisibleException( 'outofbounds', $this->mCur->pos, + [ $idx, count( $result->data ) ] ); + } + } + $this->move(); + if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == ':=' ) { + $this->move(); + $this->doLevelSet( $result ); + if ( $idx === 'new' ) { + $list[] = $result; + } else { + $list[$idx] = $result; + } + $this->setUserVariable( $varname, new AFPData( AFPData::DLIST, $list ) ); + + return; + } else { + $this->setState( $prev ); + } + } else { + $this->setState( $prev ); + } + } + $this->doLevelConditions( $result ); + } + + /** + * @param AFPData &$result + * @throws AFPUserVisibleException + */ + protected function doLevelConditions( &$result ) { + if ( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'if' ) { + $this->move(); + $this->doLevelBoolOps( $result ); + + if ( !( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'then' ) ) { + throw new AFPUserVisibleException( 'expectednotfound', + $this->mCur->pos, + [ + 'then', + $this->mCur->type, + $this->mCur->value + ] + ); + } + $this->move(); + + $r1 = new AFPData(); + $r2 = new AFPData(); + + $isTrue = $result->toBool(); + + if ( !$isTrue ) { + $scOrig = $this->mShortCircuit; + $this->mShortCircuit = $this->mAllowShort; + } + $this->doLevelConditions( $r1 ); + if ( !$isTrue ) { + $this->mShortCircuit = $scOrig; + } + + if ( !( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'else' ) ) { + throw new AFPUserVisibleException( 'expectednotfound', + $this->mCur->pos, + [ + 'else', + $this->mCur->type, + $this->mCur->value + ] + ); + } + $this->move(); + + if ( $isTrue ) { + $scOrig = $this->mShortCircuit; + $this->mShortCircuit = $this->mAllowShort; + } + $this->doLevelConditions( $r2 ); + if ( $isTrue ) { + $this->mShortCircuit = $scOrig; + } + + if ( !( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'end' ) ) { + throw new AFPUserVisibleException( 'expectednotfound', + $this->mCur->pos, + [ + 'end', + $this->mCur->type, + $this->mCur->value + ] + ); + } + $this->move(); + + if ( $result->toBool() ) { + $result = $r1; + } else { + $result = $r2; + } + } else { + $this->doLevelBoolOps( $result ); + if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == '?' ) { + $this->move(); + $r1 = new AFPData(); + $r2 = new AFPData(); + + $isTrue = $result->toBool(); + + if ( !$isTrue ) { + $scOrig = $this->mShortCircuit; + $this->mShortCircuit = $this->mAllowShort; + } + $this->doLevelConditions( $r1 ); + if ( !$isTrue ) { + $this->mShortCircuit = $scOrig; + } + + if ( !( $this->mCur->type == AFPToken::TOP && $this->mCur->value == ':' ) ) { + throw new AFPUserVisibleException( 'expectednotfound', + $this->mCur->pos, + [ + ':', + $this->mCur->type, + $this->mCur->value + ] + ); + } + $this->move(); + + if ( $isTrue ) { + $scOrig = $this->mShortCircuit; + $this->mShortCircuit = $this->mAllowShort; + } + $this->doLevelConditions( $r2 ); + if ( $isTrue ) { + $this->mShortCircuit = $scOrig; + } + + if ( $isTrue ) { + $result = $r1; + } else { + $result = $r2; + } + } + } + } + + /** + * @param AFPData &$result + */ + protected function doLevelBoolOps( &$result ) { + $this->doLevelCompares( $result ); + $ops = [ '&', '|', '^' ]; + while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) { + $op = $this->mCur->value; + $this->move(); + $r2 = new AFPData(); + + // We can go on quickly as either one statement with | is true or on with & is false + if ( ( $op == '&' && !$result->toBool() ) || ( $op == '|' && $result->toBool() ) ) { + $orig = $this->mShortCircuit; + $this->mShortCircuit = $this->mAllowShort; + $this->doLevelCompares( $r2 ); + $this->mShortCircuit = $orig; + $result = new AFPData( AFPData::DBOOL, $result->toBool() ); + continue; + } + + $this->doLevelCompares( $r2 ); + + $result = AFPData::boolOp( $result, $r2, $op ); + } + } + + /** + * @param string &$result + */ + protected function doLevelCompares( &$result ) { + $this->doLevelSumRels( $result ); + $ops = [ '==', '===', '!=', '!==', '<', '>', '<=', '>=', '=' ]; + while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) { + $op = $this->mCur->value; + $this->move(); + $r2 = new AFPData(); + $this->doLevelSumRels( $r2 ); + if ( $this->mShortCircuit ) { + break; // The result doesn't matter. + } + AbuseFilter::triggerLimiter(); + $result = AFPData::compareOp( $result, $r2, $op ); + } + } + + /** + * @param string &$result + */ + protected function doLevelSumRels( &$result ) { + $this->doLevelMulRels( $result ); + $ops = [ '+', '-' ]; + while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) { + $op = $this->mCur->value; + $this->move(); + $r2 = new AFPData(); + $this->doLevelMulRels( $r2 ); + if ( $this->mShortCircuit ) { + break; // The result doesn't matter. + } + if ( $op == '+' ) { + $result = AFPData::sum( $result, $r2 ); + } + if ( $op == '-' ) { + $result = AFPData::sub( $result, $r2 ); + } + } + } + + /** + * @param string &$result + */ + protected function doLevelMulRels( &$result ) { + $this->doLevelPow( $result ); + $ops = [ '*', '/', '%' ]; + while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) { + $op = $this->mCur->value; + $this->move(); + $r2 = new AFPData(); + $this->doLevelPow( $r2 ); + if ( $this->mShortCircuit ) { + break; // The result doesn't matter. + } + $result = AFPData::mulRel( $result, $r2, $op, $this->mCur->pos ); + } + } + + /** + * @param string &$result + */ + protected function doLevelPow( &$result ) { + $this->doLevelBoolInvert( $result ); + while ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == '**' ) { + $this->move(); + $expanent = new AFPData(); + $this->doLevelBoolInvert( $expanent ); + if ( $this->mShortCircuit ) { + break; // The result doesn't matter. + } + $result = AFPData::pow( $result, $expanent ); + } + } + + /** + * @param string &$result + */ + protected function doLevelBoolInvert( &$result ) { + if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == '!' ) { + $this->move(); + $this->doLevelSpecialWords( $result ); + if ( $this->mShortCircuit ) { + return; // The result doesn't matter. + } + $result = AFPData::boolInvert( $result ); + } else { + $this->doLevelSpecialWords( $result ); + } + } + + /** + * @param string &$result + */ + protected function doLevelSpecialWords( &$result ) { + $this->doLevelUnarys( $result ); + $keyword = strtolower( $this->mCur->value ); + if ( $this->mCur->type == AFPToken::TKEYWORD + && in_array( $keyword, array_keys( self::$mKeywords ) ) + ) { + $func = self::$mKeywords[$keyword]; + $this->move(); + $r2 = new AFPData(); + $this->doLevelUnarys( $r2 ); + + if ( $this->mShortCircuit ) { + return; // The result doesn't matter. + } + + AbuseFilter::triggerLimiter(); + + $result = AFPData::$func( $result, $r2, $this->mCur->pos ); + } + } + + /** + * @param string &$result + */ + protected function doLevelUnarys( &$result ) { + $op = $this->mCur->value; + if ( $this->mCur->type == AFPToken::TOP && ( $op == "+" || $op == "-" ) ) { + $this->move(); + $this->doLevelListElements( $result ); + if ( $this->mShortCircuit ) { + return; // The result doesn't matter. + } + if ( $op == '-' ) { + $result = AFPData::unaryMinus( $result ); + } + } else { + $this->doLevelListElements( $result ); + } + } + + /** + * @param string &$result + * @throws AFPUserVisibleException + */ + protected function doLevelListElements( &$result ) { + $this->doLevelBraces( $result ); + while ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == '[' ) { + $idx = new AFPData(); + $this->doLevelSemicolon( $idx ); + if ( !( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) ) { + throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos, + [ ']', $this->mCur->type, $this->mCur->value ] ); + } + $idx = $idx->toInt(); + if ( $result->type == AFPData::DLIST ) { + if ( count( $result->data ) <= $idx ) { + throw new AFPUserVisibleException( 'outofbounds', $this->mCur->pos, + [ $idx, count( $result->data ) ] ); + } + $result = $result->data[$idx]; + } else { + throw new AFPUserVisibleException( 'notlist', $this->mCur->pos, [] ); + } + $this->move(); + } + } + + /** + * @param string &$result + * @throws AFPUserVisibleException + */ + protected function doLevelBraces( &$result ) { + if ( $this->mCur->type == AFPToken::TBRACE && $this->mCur->value == '(' ) { + if ( $this->mShortCircuit ) { + $this->skipOverBraces(); + } else { + $this->doLevelSemicolon( $result ); + } + if ( !( $this->mCur->type == AFPToken::TBRACE && $this->mCur->value == ')' ) ) { + throw new AFPUserVisibleException( + 'expectednotfound', + $this->mCur->pos, + [ ')', $this->mCur->type, $this->mCur->value ] + ); + } + $this->move(); + } else { + $this->doLevelFunction( $result ); + } + } + + /** + * @param string &$result + * @throws AFPUserVisibleException + */ + protected function doLevelFunction( &$result ) { + if ( $this->mCur->type == AFPToken::TID && isset( self::$mFunctions[$this->mCur->value] ) ) { + $func = self::$mFunctions[$this->mCur->value]; + $this->move(); + if ( $this->mCur->type != AFPToken::TBRACE || $this->mCur->value != '(' ) { + throw new AFPUserVisibleException( 'expectednotfound', + $this->mCur->pos, + [ + '(', + $this->mCur->type, + $this->mCur->value + ] + ); + } + + if ( $this->mShortCircuit ) { + $this->skipOverBraces(); + $this->move(); + + return; // The result doesn't matter. + } + + $args = []; + do { + $r = new AFPData(); + $this->doLevelSemicolon( $r ); + $args[] = $r; + } while ( $this->mCur->type == AFPToken::TCOMMA ); + + if ( $this->mCur->type != AFPToken::TBRACE || $this->mCur->value != ')' ) { + throw new AFPUserVisibleException( 'expectednotfound', + $this->mCur->pos, + [ + ')', + $this->mCur->type, + $this->mCur->value + ] + ); + } + $this->move(); + + $funcHash = md5( $func . serialize( $args ) ); + + if ( isset( self::$funcCache[$funcHash] ) && + !in_array( $func, self::$ActiveFunctions ) + ) { + $result = self::$funcCache[$funcHash]; + } else { + AbuseFilter::triggerLimiter(); + $result = self::$funcCache[$funcHash] = $this->$func( $args ); + } + + if ( count( self::$funcCache ) > 1000 ) { + self::$funcCache = []; + } + } else { + $this->doLevelAtom( $result ); + } + } + + /** + * @param string &$result + * @throws AFPUserVisibleException + * @return AFPData + */ + protected function doLevelAtom( &$result ) { + $tok = $this->mCur->value; + switch ( $this->mCur->type ) { + case AFPToken::TID: + if ( $this->mShortCircuit ) { + break; + } + $var = strtolower( $tok ); + $result = $this->getVarValue( $var ); + break; + case AFPToken::TSTRING: + $result = new AFPData( AFPData::DSTRING, $tok ); + break; + case AFPToken::TFLOAT: + $result = new AFPData( AFPData::DFLOAT, $tok ); + break; + case AFPToken::TINT: + $result = new AFPData( AFPData::DINT, $tok ); + break; + case AFPToken::TKEYWORD: + if ( $tok == "true" ) { + $result = new AFPData( AFPData::DBOOL, true ); + } elseif ( $tok == "false" ) { + $result = new AFPData( AFPData::DBOOL, false ); + } elseif ( $tok == "null" ) { + $result = new AFPData(); + } else { + throw new AFPUserVisibleException( + 'unrecognisedkeyword', + $this->mCur->pos, + [ $tok ] + ); + } + break; + case AFPToken::TNONE: + return; // Handled at entry level + case AFPToken::TBRACE: + if ( $this->mCur->value == ')' ) { + return; // Handled at the entry level + } + case AFPToken::TSQUAREBRACKET: + if ( $this->mCur->value == '[' ) { + $list = []; + while ( true ) { + $this->move(); + if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) { + break; + } + $item = new AFPData(); + $this->doLevelSet( $item ); + $list[] = $item; + if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) { + break; + } + if ( $this->mCur->type != AFPToken::TCOMMA ) { + throw new AFPUserVisibleException( + 'expectednotfound', + $this->mCur->pos, + [ ', or ]', $this->mCur->type, $this->mCur->value ] + ); + } + } + $result = new AFPData( AFPData::DLIST, $list ); + break; + } + default: + throw new AFPUserVisibleException( + 'unexpectedtoken', + $this->mCur->pos, + [ + $this->mCur->type, + $this->mCur->value + ] + ); + } + $this->move(); + } + + /* End of levels */ + + /** + * @param string $var + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function getVarValue( $var ) { + $var = strtolower( $var ); + $builderValues = AbuseFilter::getBuilderValues(); + if ( !( array_key_exists( $var, $builderValues['vars'] ) + || $this->mVars->varIsSet( $var ) ) + ) { + // If the variable is invalid, throw an exception + throw new AFPUserVisibleException( + 'unrecognisedvar', + $this->mCur->pos, + [ $var ] + ); + } else { + return $this->mVars->getVar( $var ); + } + } + + /** + * @param string $name + * @param string $value + * @throws AFPUserVisibleException + */ + protected function setUserVariable( $name, $value ) { + $builderValues = AbuseFilter::getBuilderValues(); + if ( array_key_exists( $name, $builderValues['vars'] ) ) { + throw new AFPUserVisibleException( 'overridebuiltin', $this->mCur->pos, [ $name ] ); + } + $this->mVars->setVar( $name, $value ); + } + + // Built-in functions + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcLc( $args ) { + global $wgContLang; + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'lc', 2, count( $args ) ] + ); + } + $s = $args[0]->toString(); + + return new AFPData( AFPData::DSTRING, $wgContLang->lc( $s ) ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcUc( $args ) { + global $wgContLang; + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'uc', 2, count( $args ) ] + ); + } + $s = $args[0]->toString(); + + return new AFPData( AFPData::DSTRING, $wgContLang->uc( $s ) ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcLen( $args ) { + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'len', 2, count( $args ) ] + ); + } + if ( $args[0]->type == AFPData::DLIST ) { + // Don't use toString on lists, but count + return new AFPData( AFPData::DINT, count( $args[0]->data ) ); + } + $s = $args[0]->toString(); + + return new AFPData( AFPData::DINT, mb_strlen( $s, 'utf-8' ) ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcSimpleNorm( $args ) { + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'simplenorm', 2, count( $args ) ] + ); + } + $s = $args[0]->toString(); + + $s = preg_replace( '/[\d\W]+/', '', $s ); + $s = strtolower( $s ); + + return new AFPData( AFPData::DSTRING, $s ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcSpecialRatio( $args ) { + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'specialratio', 1, count( $args ) ] + ); + } + $s = $args[0]->toString(); + + if ( !strlen( $s ) ) { + return new AFPData( AFPData::DFLOAT, 0 ); + } + + $nospecials = $this->rmspecials( $s ); + + $val = 1. - ( ( mb_strlen( $nospecials ) / mb_strlen( $s ) ) ); + + return new AFPData( AFPData::DFLOAT, $val ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcCount( $args ) { + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'count', 1, count( $args ) ] + ); + } + + if ( $args[0]->type == AFPData::DLIST && count( $args ) == 1 ) { + return new AFPData( AFPData::DINT, count( $args[0]->data ) ); + } + + if ( count( $args ) == 1 ) { + $count = count( explode( ',', $args[0]->toString() ) ); + } else { + $needle = $args[0]->toString(); + $haystack = $args[1]->toString(); + + // T62203: Keep empty parameters from causing PHP warnings + if ( $needle === '' ) { + $count = 0; + } else { + $count = substr_count( $haystack, $needle ); + } + } + + return new AFPData( AFPData::DINT, $count ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + * @throws Exception + */ + protected function funcRCount( $args ) { + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'rcount', 1, count( $args ) ] + ); + } + + if ( count( $args ) == 1 ) { + $count = count( explode( ',', $args[0]->toString() ) ); + } else { + $needle = $args[0]->toString(); + $haystack = $args[1]->toString(); + + # Munge the regex + $needle = preg_replace( '!(\\\\\\\\)*(\\\\)?/!', '$1\/', $needle ); + $needle = "/$needle/u"; + + // Omit the '$matches' argument to avoid computing them, just count. + $count = preg_match_all( $needle, $haystack ); + + if ( $count === false ) { + throw new AFPUserVisibleException( + 'regexfailure', + $this->mCur->pos, + [ 'unspecified error in preg_match_all()', $needle ] + ); + } + } + + return new AFPData( AFPData::DINT, $count ); + } + + /** + * Returns an array of matches of needle in the haystack, the first one for the whole regex, + * the other ones for every capturing group. + * + * @param array $args + * @return AFPData A list of matches. + * @throws AFPUserVisibleException + */ + protected function funcGetMatches( $args ) { + if ( count( $args ) < 2 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'get_matches', 2, count( $args ) ] + ); + } + $needle = $args[0]->toString(); + $haystack = $args[1]->toString(); + + // Count the amount of capturing groups in the submitted pattern. + // This way we can return a fixed-dimension array, much easier to manage. + // First, strip away escaped parentheses + $sanitized = preg_replace( '/(\\\\\\\\)*\\\\\(/', '', $needle ); + // Then strip starting parentheses of non-capturing groups + // (also atomics, lookahead and so on, even if not every of them is supported) + $sanitized = preg_replace( '/\(\?/', '', $sanitized ); + // Finally create an array of falses with dimension = # of capturing groups + $groupscount = substr_count( $sanitized, '(' ) + 1; + $falsy = array_fill( 0, $groupscount, false ); + + // Munge the regex by escaping slashes + $needle = preg_replace( '!(\\\\\\\\)*(\\\\)?/!', '$1\/', $needle ); + $needle = "/$needle/u"; + + // Suppress and restore are here for the same reason as T177744 + Wikimedia\suppressWarnings(); + $check = preg_match( $needle, $haystack, $matches ); + Wikimedia\restoreWarnings(); + + if ( $check === false ) { + throw new AFPUserVisibleException( + 'regexfailure', + $this->mCur->pos, + [ 'unspecified error in preg_match()', $needle ] + ); + } + + // Returned array has non-empty positions identical to the ones returned + // by the third parameter of a standard preg_match call ($matches in this case). + // We want an union with falsy to return a fixed-dimention array. + return AFPData::newFromPHPVar( $matches + $falsy ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcIPInRange( $args ) { + if ( count( $args ) < 2 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'ip_in_range', 2, count( $args ) ] + ); + } + + $ip = $args[0]->toString(); + $range = $args[1]->toString(); + + $result = IP::isInRange( $ip, $range ); + + return new AFPData( AFPData::DBOOL, $result ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcCCNorm( $args ) { + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'ccnorm', 1, count( $args ) ] + ); + } + $s = $args[0]->toString(); + + $s = html_entity_decode( $s, ENT_QUOTES, 'UTF-8' ); + $s = $this->ccnorm( $s ); + + return new AFPData( AFPData::DSTRING, $s ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcContainsAny( $args ) { + if ( count( $args ) < 2 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'contains_any', 2, count( $args ) ] + ); + } + + $s = array_shift( $args ); + + return new AFPData( AFPData::DBOOL, self::contains( $s, $args, true ) ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcContainsAll( $args ) { + if ( count( $args ) < 2 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'contains_all', 2, count( $args ) ] + ); + } + + $s = array_shift( $args ); + + return new AFPData( AFPData::DBOOL, self::contains( $s, $args, false, false ) ); + } + + /** + * Normalize and search a string for multiple substrings in OR mode + * + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcCCNormContainsAny( $args ) { + if ( count( $args ) < 2 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'ccnorm_contains_any', 2, count( $args ) ] + ); + } + + $s = array_shift( $args ); + + return new AFPData( AFPData::DBOOL, self::contains( $s, $args, true, true ) ); + } + + /** + * Normalize and search a string for multiple substrings in AND mode + * + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcCCNormContainsAll( $args ) { + if ( count( $args ) < 2 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'ccnorm_contains_all', 2, count( $args ) ] + ); + } + + $s = array_shift( $args ); + + return new AFPData( AFPData::DBOOL, self::contains( $s, $args, false, true ) ); + } + + /** + * Search for substrings in a string + * + * Use is_any to determine wether to use logic OR (true) or AND (false). + * + * Use normalize = true to make use of ccnorm and + * normalize both sides of the search. + * + * @param AFPData $string + * @param AFPData[] $values + * @param bool $is_any + * @param bool $normalize + * + * @return bool + */ + protected static function contains( $string, $values, $is_any = true, $normalize = false ) { + $string = $string->toString(); + if ( $string == '' ) { + return false; + } + + if ( $normalize ) { + $string = self::ccnorm( $string ); + } + + foreach ( $values as $needle ) { + $needle = $needle->toString(); + if ( $normalize ) { + $needle = self::ccnorm( $needle ); + } + if ( $needle === '' ) { + // T62203: Keep empty parameters from causing PHP warnings + continue; + } + + $is_found = strpos( $string, $needle ) !== false; + if ( $is_found === $is_any ) { + // If I'm here and it's ANY (OR) it means that something is found. + // Just enough! Found! + // If I'm here and it's ALL (AND) it means that something isn't found. + // Just enough! Not found! + return $is_found; + } + } + + // If I'm here and it's ANY (OR) it means that nothing was found: + // return false (because $is_any is true) + // If I'm here and it's ALL (AND) it means that everything were found: + // return true (because $is_any is false) + return ! $is_any; + } + + /** + * @param string $s + * @return mixed + */ + protected static function ccnorm( $s ) { + // Instatiate a single version of the equivset so the data is not loaded + // more than once. + if ( !self::$equivset ) { + self::$equivset = new Equivset(); + } + + return self::$equivset->normalize( $s ); + } + + /** + * @param string $s + * @return array|string + */ + protected function rmspecials( $s ) { + return preg_replace( '/[^\p{L}\p{N}]/u', '', $s ); + } + + /** + * @param string $s + * @return array|string + */ + protected function rmdoubles( $s ) { + return preg_replace( '/(.)\1+/us', '\1', $s ); + } + + /** + * @param string $s + * @return array|string + */ + protected function rmwhitespace( $s ) { + return preg_replace( '/\s+/u', '', $s ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcRMSpecials( $args ) { + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'rmspecials', 1, count( $args ) ] + ); + } + $s = $args[0]->toString(); + + $s = $this->rmspecials( $s ); + + return new AFPData( AFPData::DSTRING, $s ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcRMWhitespace( $args ) { + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'rmwhitespace', 1, count( $args ) ] + ); + } + $s = $args[0]->toString(); + + $s = $this->rmwhitespace( $s ); + + return new AFPData( AFPData::DSTRING, $s ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcRMDoubles( $args ) { + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'rmdoubles', 1, count( $args ) ] + ); + } + $s = $args[0]->toString(); + + $s = $this->rmdoubles( $s ); + + return new AFPData( AFPData::DSTRING, $s ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcNorm( $args ) { + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'norm', 1, count( $args ) ] + ); + } + $s = $args[0]->toString(); + + $s = $this->ccnorm( $s ); + $s = $this->rmdoubles( $s ); + $s = $this->rmspecials( $s ); + $s = $this->rmwhitespace( $s ); + + return new AFPData( AFPData::DSTRING, $s ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcSubstr( $args ) { + if ( count( $args ) < 2 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'substr', 2, count( $args ) ] + ); + } + + $s = $args[0]->toString(); + $offset = $args[1]->toInt(); + + if ( isset( $args[2] ) ) { + $length = $args[2]->toInt(); + + $result = mb_substr( $s, $offset, $length ); + } else { + $result = mb_substr( $s, $offset ); + } + + return new AFPData( AFPData::DSTRING, $result ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcStrPos( $args ) { + if ( count( $args ) < 2 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'strpos', 2, count( $args ) ] + ); + } + + $haystack = $args[0]->toString(); + $needle = $args[1]->toString(); + + // T62203: Keep empty parameters from causing PHP warnings + if ( $needle === '' ) { + return new AFPData( AFPData::DINT, -1 ); + } + + if ( isset( $args[2] ) ) { + $offset = $args[2]->toInt(); + + $result = mb_strpos( $haystack, $needle, $offset ); + } else { + $result = mb_strpos( $haystack, $needle ); + } + + if ( $result === false ) { + $result = -1; + } + + return new AFPData( AFPData::DINT, $result ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcStrReplace( $args ) { + if ( count( $args ) < 3 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'str_replace', 3, count( $args ) ] + ); + } + + $subject = $args[0]->toString(); + $search = $args[1]->toString(); + $replace = $args[2]->toString(); + + return new AFPData( AFPData::DSTRING, str_replace( $search, $replace, $subject ) ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function funcStrRegexEscape( $args ) { + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( 'notenoughargs', $this->mCur->pos, + [ 'rescape', 1, count( $args ) ] ); + } + + $string = $args[0]->toString(); + + // preg_quote does not need the second parameter, since rlike takes + // care of the delimiter symbol itself + return new AFPData( AFPData::DSTRING, preg_quote( $string ) ); + } + + /** + * @param array $args + * @return mixed + * @throws AFPUserVisibleException + */ + protected function funcSetVar( $args ) { + if ( count( $args ) < 2 ) { + throw new AFPUserVisibleException( + 'notenoughargs', + $this->mCur->pos, + [ 'set_var', 2, count( $args ) ] + ); + } + + $varName = $args[0]->toString(); + $value = $args[1]; + + $this->setUserVariable( $varName, $value ); + + return $value; + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function castString( $args ) { + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, [ __METHOD__ ] ); + } + $val = $args[0]; + + return AFPData::castTypes( $val, AFPData::DSTRING ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function castInt( $args ) { + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, [ __METHOD__ ] ); + } + $val = $args[0]; + + return AFPData::castTypes( $val, AFPData::DINT ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function castFloat( $args ) { + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, [ __METHOD__ ] ); + } + $val = $args[0]; + + return AFPData::castTypes( $val, AFPData::DFLOAT ); + } + + /** + * @param array $args + * @return AFPData + * @throws AFPUserVisibleException + */ + protected function castBool( $args ) { + if ( count( $args ) < 1 ) { + throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, [ __METHOD__ ] ); + } + $val = $args[0]; + + return AFPData::castTypes( $val, AFPData::DBOOL ); + } +} diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterTokenizer.php b/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterTokenizer.php new file mode 100644 index 00000000..a97fccaf --- /dev/null +++ b/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterTokenizer.php @@ -0,0 +1,258 @@ +<?php + +use MediaWiki\MediaWikiServices; + +/** + * Tokenizer for AbuseFilter rules. + */ +class AbuseFilterTokenizer { + /** @var int Tokenizer cache version. Increment this when changing the syntax. **/ + const CACHE_VERSION = 1; + const COMMENT_START_RE = '/\s*\/\*/A'; + const ID_SYMBOL_RE = '/[0-9A-Za-z_]+/A'; + const OPERATOR_RE = + '/(\!\=\=|\!\=|\!|\*\*|\*|\/|\+|\-|%|&|\||\^|\:\=|\?|\:|\<\=|\<|\>\=|\>|\=\=\=|\=\=|\=)/A'; + const RADIX_RE = '/([0-9A-Fa-f]+(?:\.\d*)?|\.\d+)([bxo])?/Au'; + const WHITESPACE = "\011\012\013\014\015\040"; + + // Order is important. The punctuation-matching regex requires that + // ** comes before *, etc. They are sorted to make it easy to spot + // such errors. + public static $operators = [ + '!==', '!=', '!', // Inequality + '**', '*', // Multiplication/exponentiation + '/', '+', '-', '%', // Other arithmetic + '&', '|', '^', // Logic + ':=', // Setting + '?', ':', // Ternery + '<=', '<', // Less than + '>=', '>', // Greater than + '===', '==', '=', // Equality + ]; + + public static $punctuation = [ + ',' => AFPToken::TCOMMA, + '(' => AFPToken::TBRACE, + ')' => AFPToken::TBRACE, + '[' => AFPToken::TSQUAREBRACKET, + ']' => AFPToken::TSQUAREBRACKET, + ';' => AFPToken::TSTATEMENTSEPARATOR, + ]; + + public static $bases = [ + 'b' => 2, + 'x' => 16, + 'o' => 8 + ]; + + public static $baseCharsRe = [ + 2 => '/^[01]+$/', + 8 => '/^[0-8]+$/', + 16 => '/^[0-9A-Fa-f]+$/', + 10 => '/^[0-9.]+$/', + ]; + + public static $keywords = [ + 'in', 'like', 'true', 'false', 'null', 'contains', 'matches', + 'rlike', 'irlike', 'regex', 'if', 'then', 'else', 'end', + ]; + + /** + * @param string $code + * @return array + * @throws AFPException + * @throws AFPUserVisibleException + */ + static function tokenize( $code ) { + static $tokenizerCache = null; + + if ( !$tokenizerCache ) { + $tokenizerCache = ObjectCache::getLocalServerInstance( 'hash' ); + } + + static $stats = null; + + if ( !$stats ) { + $stats = MediaWikiServices::getInstance()->getStatsdDataFactory(); + } + + $cacheKey = wfGlobalCacheKey( __CLASS__, self::CACHE_VERSION, crc32( $code ) ); + + $tokens = $tokenizerCache->get( $cacheKey ); + + if ( $tokens ) { + $stats->increment( 'AbuseFilter.tokenizerCache.hit' ); + return $tokens; + } + + $stats->increment( 'AbuseFilter.tokenizerCache.miss' ); + $tokens = []; + $curPos = 0; + + do { + $prevPos = $curPos; + $token = self::nextToken( $code, $curPos ); + $tokens[ $token->pos ] = [ $token, $curPos ]; + } while ( $curPos !== $prevPos ); + + $tokenizerCache->set( $cacheKey, $tokens, 60 * 60 * 24 ); + + return $tokens; + } + + /** + * @param string $code + * @param int &$offset + * @return AFPToken + * @throws AFPException + * @throws AFPUserVisibleException + */ + protected static function nextToken( $code, &$offset ) { + $matches = []; + $start = $offset; + + // Read past comments + while ( preg_match( self::COMMENT_START_RE, $code, $matches, 0, $offset ) ) { + if ( strpos( $code, '*/', $offset ) === false ) { + throw new AFPUserVisibleException( + 'unclosedcomment', $offset, [] ); + } + $offset = strpos( $code, '*/', $offset ) + 2; + } + + // Spaces + $offset += strspn( $code, self::WHITESPACE, $offset ); + if ( $offset >= strlen( $code ) ) { + return new AFPToken( AFPToken::TNONE, '', $start ); + } + + $chr = $code[$offset]; + + // Punctuation + if ( isset( self::$punctuation[$chr] ) ) { + $offset++; + return new AFPToken( self::$punctuation[$chr], $chr, $start ); + } + + // String literal + if ( $chr === '"' || $chr === "'" ) { + return self::readStringLiteral( $code, $offset, $start ); + } + + $matches = []; + + // Operators + if ( preg_match( self::OPERATOR_RE, $code, $matches, 0, $offset ) ) { + $token = $matches[0]; + $offset += strlen( $token ); + return new AFPToken( AFPToken::TOP, $token, $start ); + } + + // Numbers + if ( preg_match( self::RADIX_RE, $code, $matches, 0, $offset ) ) { + $token = $matches[0]; + $input = $matches[1]; + $baseChar = isset( $matches[2] ) ? $matches[2] : null; + // Sometimes the base char gets mixed in with the rest of it because + // the regex targets hex, too. + // This mostly happens with binary + if ( !$baseChar && !empty( self::$bases[ substr( $input, - 1 ) ] ) ) { + $baseChar = substr( $input, - 1, 1 ); + $input = substr( $input, 0, - 1 ); + } + + $base = $baseChar ? self::$bases[$baseChar] : 10; + + // Check against the appropriate character class for input validation + + if ( preg_match( self::$baseCharsRe[$base], $input ) ) { + $num = $base !== 10 ? base_convert( $input, $base, 10 ) : $input; + $offset += strlen( $token ); + return ( strpos( $input, '.' ) !== false ) + ? new AFPToken( AFPToken::TFLOAT, floatval( $num ), $start ) + : new AFPToken( AFPToken::TINT, intval( $num ), $start ); + } + } + + // IDs / Keywords + + if ( preg_match( self::ID_SYMBOL_RE, $code, $matches, 0, $offset ) ) { + $token = $matches[0]; + $offset += strlen( $token ); + $type = in_array( $token, self::$keywords ) + ? AFPToken::TKEYWORD + : AFPToken::TID; + return new AFPToken( $type, $token, $start ); + } + + throw new AFPUserVisibleException( + 'unrecognisedtoken', $start, [ substr( $code, $start ) ] ); + } + + /** + * @param string $code + * @param int &$offset + * @param int $start + * @return AFPToken + * @throws AFPException + * @throws AFPUserVisibleException + */ + protected static function readStringLiteral( $code, &$offset, $start ) { + $type = $code[$offset]; + $offset++; + $length = strlen( $code ); + $token = ''; + while ( $offset < $length ) { + if ( $code[$offset] === $type ) { + $offset++; + return new AFPToken( AFPToken::TSTRING, $token, $start ); + } + + // Performance: Use a PHP function (implemented in C) + // to scan ahead. + $addLength = strcspn( $code, $type . "\\", $offset ); + if ( $addLength ) { + $token .= substr( $code, $offset, $addLength ); + $offset += $addLength; + } elseif ( $code[$offset] == '\\' ) { + switch ( $code[$offset + 1] ) { + case '\\': + $token .= '\\'; + break; + case $type: + $token .= $type; + break; + case 'n'; + $token .= "\n"; + break; + case 'r': + $token .= "\r"; + break; + case 't': + $token .= "\t"; + break; + case 'x': + $chr = substr( $code, $offset + 2, 2 ); + + if ( preg_match( '/^[0-9A-Fa-f]{2}$/', $chr ) ) { + $chr = base_convert( $chr, 16, 10 ); + $token .= chr( $chr ); + $offset += 2; # \xXX -- 2 done later + } else { + $token .= 'x'; + } + break; + default: + $token .= "\\" . $code[$offset + 1]; + } + + $offset += 2; + + } else { + $token .= $code[$offset]; + $offset++; + } + } + throw new AFPUserVisibleException( 'unclosedstring', $offset, [] ); + } +} |