summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterParser.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterParser.php')
-rw-r--r--www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterParser.php1560
1 files changed, 1560 insertions, 0 deletions
diff --git a/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterParser.php b/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterParser.php
new file mode 100644
index 00000000..50f8dddc
--- /dev/null
+++ b/www/wiki/extensions/AbuseFilter/includes/parser/AbuseFilterParser.php
@@ -0,0 +1,1560 @@
+<?php
+
+use Wikimedia\Equivset\Equivset;
+
+class AbuseFilterParser {
+ public $mCode, $mTokens, $mPos, $mCur, $mShortCircuit, $mAllowShort, $mLen;
+
+ /**
+ * @var AbuseFilterVariableHolder
+ */
+ public $mVars;
+
+ // length,lcase,ucase,ccnorm,rmdoubles,specialratio,rmspecials,norm,count,get_matches
+ public static $mFunctions = [
+ 'lcase' => 'funcLc',
+ 'ucase' => 'funcUc',
+ 'length' => 'funcLen',
+ 'string' => 'castString',
+ 'int' => 'castInt',
+ 'float' => 'castFloat',
+ 'bool' => 'castBool',
+ 'norm' => 'funcNorm',
+ 'ccnorm' => 'funcCCNorm',
+ 'ccnorm_contains_any' => 'funcCCNormContainsAny',
+ 'ccnorm_contains_all' => 'funcCCNormContainsAll',
+ 'specialratio' => 'funcSpecialRatio',
+ 'rmspecials' => 'funcRMSpecials',
+ 'rmdoubles' => 'funcRMDoubles',
+ 'rmwhitespace' => 'funcRMWhitespace',
+ 'count' => 'funcCount',
+ 'rcount' => 'funcRCount',
+ 'get_matches' => 'funcGetMatches',
+ 'ip_in_range' => 'funcIPInRange',
+ 'contains_any' => 'funcContainsAny',
+ 'contains_all' => 'funcContainsAll',
+ 'substr' => 'funcSubstr',
+ 'strlen' => 'funcLen',
+ 'strpos' => 'funcStrPos',
+ 'str_replace' => 'funcStrReplace',
+ 'rescape' => 'funcStrRegexEscape',
+ 'set' => 'funcSetVar',
+ 'set_var' => 'funcSetVar',
+ ];
+
+ // Functions that affect parser state, and shouldn't be cached.
+ public static $ActiveFunctions = [
+ 'funcSetVar',
+ ];
+
+ public static $mKeywords = [
+ 'in' => 'keywordIn',
+ 'like' => 'keywordLike',
+ 'matches' => 'keywordLike',
+ 'contains' => 'keywordContains',
+ 'rlike' => 'keywordRegex',
+ 'irlike' => 'keywordRegexInsensitive',
+ 'regex' => 'keywordRegex',
+ ];
+
+ public static $funcCache = [];
+
+ /**
+ * @var Equivset
+ */
+ protected static $equivset;
+
+ /**
+ * Create a new instance
+ *
+ * @param AbuseFilterVariableHolder $vars
+ */
+ public function __construct( $vars = null ) {
+ $this->resetState();
+ if ( $vars instanceof AbuseFilterVariableHolder ) {
+ $this->mVars = $vars;
+ }
+ }
+
+ public function resetState() {
+ $this->mCode = '';
+ $this->mTokens = [];
+ $this->mVars = new AbuseFilterVariableHolder;
+ $this->mPos = 0;
+ $this->mShortCircuit = false;
+ $this->mAllowShort = true;
+ }
+
+ /**
+ * @param string $filter
+ * @return array|bool
+ */
+ public function checkSyntax( $filter ) {
+ try {
+ $origAS = $this->mAllowShort;
+ $this->mAllowShort = false;
+ $this->parse( $filter );
+ } catch ( AFPUserVisibleException $excep ) {
+ $this->mAllowShort = $origAS;
+
+ return [ $excep->getMessageObj()->text(), $excep->mPosition ];
+ }
+ $this->mAllowShort = $origAS;
+
+ return true;
+ }
+
+ /**
+ * @param string $name
+ * @param mixed $value
+ */
+ public function setVar( $name, $value ) {
+ $this->mVars->setVar( $name, $value );
+ }
+
+ /**
+ * @param mixed $vars
+ */
+ public function setVars( $vars ) {
+ if ( is_array( $vars ) ) {
+ foreach ( $vars as $name => $var ) {
+ $this->setVar( $name, $var );
+ }
+ } elseif ( $vars instanceof AbuseFilterVariableHolder ) {
+ $this->mVars->addHolders( $vars );
+ }
+ }
+
+ /**
+ * @return AFPToken
+ */
+ protected function move() {
+ list( $this->mCur, $this->mPos ) = $this->mTokens[$this->mPos];
+ }
+
+ /**
+ * getState() function allows parser state to be rollbacked to several tokens back
+ * @return AFPParserState
+ */
+ protected function getState() {
+ return new AFPParserState( $this->mCur, $this->mPos );
+ }
+
+ /**
+ * setState() function allows parser state to be rollbacked to several tokens back
+ * @param AFPParserState $state
+ */
+ protected function setState( AFPParserState $state ) {
+ $this->mCur = $state->token;
+ $this->mPos = $state->pos;
+ }
+
+ /**
+ * @return mixed
+ * @throws AFPUserVisibleException
+ */
+ protected function skipOverBraces() {
+ if ( !( $this->mCur->type == AFPToken::TBRACE && $this->mCur->value == '(' ) ||
+ !$this->mShortCircuit
+ ) {
+ return;
+ }
+
+ $braces = 1;
+ while ( $this->mCur->type != AFPToken::TNONE && $braces > 0 ) {
+ $this->move();
+ if ( $this->mCur->type == AFPToken::TBRACE ) {
+ if ( $this->mCur->value == '(' ) {
+ $braces++;
+ } elseif ( $this->mCur->value == ')' ) {
+ $braces--;
+ }
+ }
+ }
+ if ( !( $this->mCur->type == AFPToken::TBRACE && $this->mCur->value == ')' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos, [ ')' ] );
+ }
+ }
+
+ /**
+ * @param string $code
+ * @return bool
+ */
+ public function parse( $code ) {
+ return $this->intEval( $code )->toBool();
+ }
+
+ /**
+ * @param string $filter
+ * @return string
+ */
+ public function evaluateExpression( $filter ) {
+ return $this->intEval( $filter )->toString();
+ }
+
+ /**
+ * @param string $code
+ * @return AFPData
+ */
+ public function intEval( $code ) {
+ // Setup, resetting
+ $this->mCode = $code;
+ $this->mTokens = AbuseFilterTokenizer::tokenize( $code );
+ $this->mPos = 0;
+ $this->mLen = strlen( $code );
+ $this->mShortCircuit = false;
+
+ $result = new AFPData();
+ $this->doLevelEntry( $result );
+
+ return $result;
+ }
+
+ /**
+ * @param string $a
+ * @param string $b
+ * @return int
+ */
+ static function lengthCompare( $a, $b ) {
+ if ( strlen( $a ) == strlen( $b ) ) {
+ return 0;
+ }
+
+ return ( strlen( $a ) < strlen( $b ) ) ? -1 : 1;
+ }
+
+ /* Levels */
+
+ /**
+ * Handles unexpected characters after the expression
+ *
+ * @param AFPData &$result
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelEntry( &$result ) {
+ $this->doLevelSemicolon( $result );
+
+ if ( $this->mCur->type != AFPToken::TNONE ) {
+ throw new AFPUserVisibleException(
+ 'unexpectedatend',
+ $this->mCur->pos, [ $this->mCur->type ]
+ );
+ }
+ }
+
+ /**
+ * Handles multiple expressions
+ * @param AFPData &$result
+ */
+ protected function doLevelSemicolon( &$result ) {
+ do {
+ $this->move();
+ if ( $this->mCur->type != AFPToken::TSTATEMENTSEPARATOR ) {
+ $this->doLevelSet( $result );
+ }
+ } while ( $this->mCur->type == AFPToken::TSTATEMENTSEPARATOR );
+ }
+
+ /**
+ * Handles multiple expressions
+ *
+ * @param AFPData &$result
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelSet( &$result ) {
+ if ( $this->mCur->type == AFPToken::TID ) {
+ $varname = $this->mCur->value;
+ $prev = $this->getState();
+ $this->move();
+
+ if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == ':=' ) {
+ $this->move();
+ $this->doLevelSet( $result );
+ $this->setUserVariable( $varname, $result );
+
+ return;
+ } elseif ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == '[' ) {
+ if ( !$this->mVars->varIsSet( $varname ) ) {
+ throw new AFPUserVisibleException( 'unrecognisedvar',
+ $this->mCur->pos,
+ [ $varname ]
+ );
+ }
+ $list = $this->mVars->getVar( $varname );
+ if ( $list->type != AFPData::DLIST ) {
+ throw new AFPUserVisibleException( 'notlist', $this->mCur->pos, [] );
+ }
+ $list = $list->toList();
+ $this->move();
+ if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) {
+ $idx = 'new';
+ } else {
+ $this->setState( $prev );
+ $this->move();
+ $idx = new AFPData();
+ $this->doLevelSemicolon( $idx );
+ $idx = $idx->toInt();
+ if ( !( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos,
+ [ ']', $this->mCur->type, $this->mCur->value ] );
+ }
+ if ( count( $list ) <= $idx ) {
+ throw new AFPUserVisibleException( 'outofbounds', $this->mCur->pos,
+ [ $idx, count( $result->data ) ] );
+ }
+ }
+ $this->move();
+ if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == ':=' ) {
+ $this->move();
+ $this->doLevelSet( $result );
+ if ( $idx === 'new' ) {
+ $list[] = $result;
+ } else {
+ $list[$idx] = $result;
+ }
+ $this->setUserVariable( $varname, new AFPData( AFPData::DLIST, $list ) );
+
+ return;
+ } else {
+ $this->setState( $prev );
+ }
+ } else {
+ $this->setState( $prev );
+ }
+ }
+ $this->doLevelConditions( $result );
+ }
+
+ /**
+ * @param AFPData &$result
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelConditions( &$result ) {
+ if ( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'if' ) {
+ $this->move();
+ $this->doLevelBoolOps( $result );
+
+ if ( !( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'then' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mCur->pos,
+ [
+ 'then',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+
+ $r1 = new AFPData();
+ $r2 = new AFPData();
+
+ $isTrue = $result->toBool();
+
+ if ( !$isTrue ) {
+ $scOrig = $this->mShortCircuit;
+ $this->mShortCircuit = $this->mAllowShort;
+ }
+ $this->doLevelConditions( $r1 );
+ if ( !$isTrue ) {
+ $this->mShortCircuit = $scOrig;
+ }
+
+ if ( !( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'else' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mCur->pos,
+ [
+ 'else',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+
+ if ( $isTrue ) {
+ $scOrig = $this->mShortCircuit;
+ $this->mShortCircuit = $this->mAllowShort;
+ }
+ $this->doLevelConditions( $r2 );
+ if ( $isTrue ) {
+ $this->mShortCircuit = $scOrig;
+ }
+
+ if ( !( $this->mCur->type == AFPToken::TKEYWORD && $this->mCur->value == 'end' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mCur->pos,
+ [
+ 'end',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+
+ if ( $result->toBool() ) {
+ $result = $r1;
+ } else {
+ $result = $r2;
+ }
+ } else {
+ $this->doLevelBoolOps( $result );
+ if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == '?' ) {
+ $this->move();
+ $r1 = new AFPData();
+ $r2 = new AFPData();
+
+ $isTrue = $result->toBool();
+
+ if ( !$isTrue ) {
+ $scOrig = $this->mShortCircuit;
+ $this->mShortCircuit = $this->mAllowShort;
+ }
+ $this->doLevelConditions( $r1 );
+ if ( !$isTrue ) {
+ $this->mShortCircuit = $scOrig;
+ }
+
+ if ( !( $this->mCur->type == AFPToken::TOP && $this->mCur->value == ':' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mCur->pos,
+ [
+ ':',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+
+ if ( $isTrue ) {
+ $scOrig = $this->mShortCircuit;
+ $this->mShortCircuit = $this->mAllowShort;
+ }
+ $this->doLevelConditions( $r2 );
+ if ( $isTrue ) {
+ $this->mShortCircuit = $scOrig;
+ }
+
+ if ( $isTrue ) {
+ $result = $r1;
+ } else {
+ $result = $r2;
+ }
+ }
+ }
+ }
+
+ /**
+ * @param AFPData &$result
+ */
+ protected function doLevelBoolOps( &$result ) {
+ $this->doLevelCompares( $result );
+ $ops = [ '&', '|', '^' ];
+ while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
+ $op = $this->mCur->value;
+ $this->move();
+ $r2 = new AFPData();
+
+ // We can go on quickly as either one statement with | is true or on with & is false
+ if ( ( $op == '&' && !$result->toBool() ) || ( $op == '|' && $result->toBool() ) ) {
+ $orig = $this->mShortCircuit;
+ $this->mShortCircuit = $this->mAllowShort;
+ $this->doLevelCompares( $r2 );
+ $this->mShortCircuit = $orig;
+ $result = new AFPData( AFPData::DBOOL, $result->toBool() );
+ continue;
+ }
+
+ $this->doLevelCompares( $r2 );
+
+ $result = AFPData::boolOp( $result, $r2, $op );
+ }
+ }
+
+ /**
+ * @param string &$result
+ */
+ protected function doLevelCompares( &$result ) {
+ $this->doLevelSumRels( $result );
+ $ops = [ '==', '===', '!=', '!==', '<', '>', '<=', '>=', '=' ];
+ while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
+ $op = $this->mCur->value;
+ $this->move();
+ $r2 = new AFPData();
+ $this->doLevelSumRels( $r2 );
+ if ( $this->mShortCircuit ) {
+ break; // The result doesn't matter.
+ }
+ AbuseFilter::triggerLimiter();
+ $result = AFPData::compareOp( $result, $r2, $op );
+ }
+ }
+
+ /**
+ * @param string &$result
+ */
+ protected function doLevelSumRels( &$result ) {
+ $this->doLevelMulRels( $result );
+ $ops = [ '+', '-' ];
+ while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
+ $op = $this->mCur->value;
+ $this->move();
+ $r2 = new AFPData();
+ $this->doLevelMulRels( $r2 );
+ if ( $this->mShortCircuit ) {
+ break; // The result doesn't matter.
+ }
+ if ( $op == '+' ) {
+ $result = AFPData::sum( $result, $r2 );
+ }
+ if ( $op == '-' ) {
+ $result = AFPData::sub( $result, $r2 );
+ }
+ }
+ }
+
+ /**
+ * @param string &$result
+ */
+ protected function doLevelMulRels( &$result ) {
+ $this->doLevelPow( $result );
+ $ops = [ '*', '/', '%' ];
+ while ( $this->mCur->type == AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
+ $op = $this->mCur->value;
+ $this->move();
+ $r2 = new AFPData();
+ $this->doLevelPow( $r2 );
+ if ( $this->mShortCircuit ) {
+ break; // The result doesn't matter.
+ }
+ $result = AFPData::mulRel( $result, $r2, $op, $this->mCur->pos );
+ }
+ }
+
+ /**
+ * @param string &$result
+ */
+ protected function doLevelPow( &$result ) {
+ $this->doLevelBoolInvert( $result );
+ while ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == '**' ) {
+ $this->move();
+ $expanent = new AFPData();
+ $this->doLevelBoolInvert( $expanent );
+ if ( $this->mShortCircuit ) {
+ break; // The result doesn't matter.
+ }
+ $result = AFPData::pow( $result, $expanent );
+ }
+ }
+
+ /**
+ * @param string &$result
+ */
+ protected function doLevelBoolInvert( &$result ) {
+ if ( $this->mCur->type == AFPToken::TOP && $this->mCur->value == '!' ) {
+ $this->move();
+ $this->doLevelSpecialWords( $result );
+ if ( $this->mShortCircuit ) {
+ return; // The result doesn't matter.
+ }
+ $result = AFPData::boolInvert( $result );
+ } else {
+ $this->doLevelSpecialWords( $result );
+ }
+ }
+
+ /**
+ * @param string &$result
+ */
+ protected function doLevelSpecialWords( &$result ) {
+ $this->doLevelUnarys( $result );
+ $keyword = strtolower( $this->mCur->value );
+ if ( $this->mCur->type == AFPToken::TKEYWORD
+ && in_array( $keyword, array_keys( self::$mKeywords ) )
+ ) {
+ $func = self::$mKeywords[$keyword];
+ $this->move();
+ $r2 = new AFPData();
+ $this->doLevelUnarys( $r2 );
+
+ if ( $this->mShortCircuit ) {
+ return; // The result doesn't matter.
+ }
+
+ AbuseFilter::triggerLimiter();
+
+ $result = AFPData::$func( $result, $r2, $this->mCur->pos );
+ }
+ }
+
+ /**
+ * @param string &$result
+ */
+ protected function doLevelUnarys( &$result ) {
+ $op = $this->mCur->value;
+ if ( $this->mCur->type == AFPToken::TOP && ( $op == "+" || $op == "-" ) ) {
+ $this->move();
+ $this->doLevelListElements( $result );
+ if ( $this->mShortCircuit ) {
+ return; // The result doesn't matter.
+ }
+ if ( $op == '-' ) {
+ $result = AFPData::unaryMinus( $result );
+ }
+ } else {
+ $this->doLevelListElements( $result );
+ }
+ }
+
+ /**
+ * @param string &$result
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelListElements( &$result ) {
+ $this->doLevelBraces( $result );
+ while ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == '[' ) {
+ $idx = new AFPData();
+ $this->doLevelSemicolon( $idx );
+ if ( !( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) ) {
+ throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos,
+ [ ']', $this->mCur->type, $this->mCur->value ] );
+ }
+ $idx = $idx->toInt();
+ if ( $result->type == AFPData::DLIST ) {
+ if ( count( $result->data ) <= $idx ) {
+ throw new AFPUserVisibleException( 'outofbounds', $this->mCur->pos,
+ [ $idx, count( $result->data ) ] );
+ }
+ $result = $result->data[$idx];
+ } else {
+ throw new AFPUserVisibleException( 'notlist', $this->mCur->pos, [] );
+ }
+ $this->move();
+ }
+ }
+
+ /**
+ * @param string &$result
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelBraces( &$result ) {
+ if ( $this->mCur->type == AFPToken::TBRACE && $this->mCur->value == '(' ) {
+ if ( $this->mShortCircuit ) {
+ $this->skipOverBraces();
+ } else {
+ $this->doLevelSemicolon( $result );
+ }
+ if ( !( $this->mCur->type == AFPToken::TBRACE && $this->mCur->value == ')' ) ) {
+ throw new AFPUserVisibleException(
+ 'expectednotfound',
+ $this->mCur->pos,
+ [ ')', $this->mCur->type, $this->mCur->value ]
+ );
+ }
+ $this->move();
+ } else {
+ $this->doLevelFunction( $result );
+ }
+ }
+
+ /**
+ * @param string &$result
+ * @throws AFPUserVisibleException
+ */
+ protected function doLevelFunction( &$result ) {
+ if ( $this->mCur->type == AFPToken::TID && isset( self::$mFunctions[$this->mCur->value] ) ) {
+ $func = self::$mFunctions[$this->mCur->value];
+ $this->move();
+ if ( $this->mCur->type != AFPToken::TBRACE || $this->mCur->value != '(' ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mCur->pos,
+ [
+ '(',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+
+ if ( $this->mShortCircuit ) {
+ $this->skipOverBraces();
+ $this->move();
+
+ return; // The result doesn't matter.
+ }
+
+ $args = [];
+ do {
+ $r = new AFPData();
+ $this->doLevelSemicolon( $r );
+ $args[] = $r;
+ } while ( $this->mCur->type == AFPToken::TCOMMA );
+
+ if ( $this->mCur->type != AFPToken::TBRACE || $this->mCur->value != ')' ) {
+ throw new AFPUserVisibleException( 'expectednotfound',
+ $this->mCur->pos,
+ [
+ ')',
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+
+ $funcHash = md5( $func . serialize( $args ) );
+
+ if ( isset( self::$funcCache[$funcHash] ) &&
+ !in_array( $func, self::$ActiveFunctions )
+ ) {
+ $result = self::$funcCache[$funcHash];
+ } else {
+ AbuseFilter::triggerLimiter();
+ $result = self::$funcCache[$funcHash] = $this->$func( $args );
+ }
+
+ if ( count( self::$funcCache ) > 1000 ) {
+ self::$funcCache = [];
+ }
+ } else {
+ $this->doLevelAtom( $result );
+ }
+ }
+
+ /**
+ * @param string &$result
+ * @throws AFPUserVisibleException
+ * @return AFPData
+ */
+ protected function doLevelAtom( &$result ) {
+ $tok = $this->mCur->value;
+ switch ( $this->mCur->type ) {
+ case AFPToken::TID:
+ if ( $this->mShortCircuit ) {
+ break;
+ }
+ $var = strtolower( $tok );
+ $result = $this->getVarValue( $var );
+ break;
+ case AFPToken::TSTRING:
+ $result = new AFPData( AFPData::DSTRING, $tok );
+ break;
+ case AFPToken::TFLOAT:
+ $result = new AFPData( AFPData::DFLOAT, $tok );
+ break;
+ case AFPToken::TINT:
+ $result = new AFPData( AFPData::DINT, $tok );
+ break;
+ case AFPToken::TKEYWORD:
+ if ( $tok == "true" ) {
+ $result = new AFPData( AFPData::DBOOL, true );
+ } elseif ( $tok == "false" ) {
+ $result = new AFPData( AFPData::DBOOL, false );
+ } elseif ( $tok == "null" ) {
+ $result = new AFPData();
+ } else {
+ throw new AFPUserVisibleException(
+ 'unrecognisedkeyword',
+ $this->mCur->pos,
+ [ $tok ]
+ );
+ }
+ break;
+ case AFPToken::TNONE:
+ return; // Handled at entry level
+ case AFPToken::TBRACE:
+ if ( $this->mCur->value == ')' ) {
+ return; // Handled at the entry level
+ }
+ case AFPToken::TSQUAREBRACKET:
+ if ( $this->mCur->value == '[' ) {
+ $list = [];
+ while ( true ) {
+ $this->move();
+ if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) {
+ break;
+ }
+ $item = new AFPData();
+ $this->doLevelSet( $item );
+ $list[] = $item;
+ if ( $this->mCur->type == AFPToken::TSQUAREBRACKET && $this->mCur->value == ']' ) {
+ break;
+ }
+ if ( $this->mCur->type != AFPToken::TCOMMA ) {
+ throw new AFPUserVisibleException(
+ 'expectednotfound',
+ $this->mCur->pos,
+ [ ', or ]', $this->mCur->type, $this->mCur->value ]
+ );
+ }
+ }
+ $result = new AFPData( AFPData::DLIST, $list );
+ break;
+ }
+ default:
+ throw new AFPUserVisibleException(
+ 'unexpectedtoken',
+ $this->mCur->pos,
+ [
+ $this->mCur->type,
+ $this->mCur->value
+ ]
+ );
+ }
+ $this->move();
+ }
+
+ /* End of levels */
+
+ /**
+ * @param string $var
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function getVarValue( $var ) {
+ $var = strtolower( $var );
+ $builderValues = AbuseFilter::getBuilderValues();
+ if ( !( array_key_exists( $var, $builderValues['vars'] )
+ || $this->mVars->varIsSet( $var ) )
+ ) {
+ // If the variable is invalid, throw an exception
+ throw new AFPUserVisibleException(
+ 'unrecognisedvar',
+ $this->mCur->pos,
+ [ $var ]
+ );
+ } else {
+ return $this->mVars->getVar( $var );
+ }
+ }
+
+ /**
+ * @param string $name
+ * @param string $value
+ * @throws AFPUserVisibleException
+ */
+ protected function setUserVariable( $name, $value ) {
+ $builderValues = AbuseFilter::getBuilderValues();
+ if ( array_key_exists( $name, $builderValues['vars'] ) ) {
+ throw new AFPUserVisibleException( 'overridebuiltin', $this->mCur->pos, [ $name ] );
+ }
+ $this->mVars->setVar( $name, $value );
+ }
+
+ // Built-in functions
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcLc( $args ) {
+ global $wgContLang;
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'lc', 2, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ return new AFPData( AFPData::DSTRING, $wgContLang->lc( $s ) );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcUc( $args ) {
+ global $wgContLang;
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'uc', 2, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ return new AFPData( AFPData::DSTRING, $wgContLang->uc( $s ) );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcLen( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'len', 2, count( $args ) ]
+ );
+ }
+ if ( $args[0]->type == AFPData::DLIST ) {
+ // Don't use toString on lists, but count
+ return new AFPData( AFPData::DINT, count( $args[0]->data ) );
+ }
+ $s = $args[0]->toString();
+
+ return new AFPData( AFPData::DINT, mb_strlen( $s, 'utf-8' ) );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcSimpleNorm( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'simplenorm', 2, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ $s = preg_replace( '/[\d\W]+/', '', $s );
+ $s = strtolower( $s );
+
+ return new AFPData( AFPData::DSTRING, $s );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcSpecialRatio( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'specialratio', 1, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ if ( !strlen( $s ) ) {
+ return new AFPData( AFPData::DFLOAT, 0 );
+ }
+
+ $nospecials = $this->rmspecials( $s );
+
+ $val = 1. - ( ( mb_strlen( $nospecials ) / mb_strlen( $s ) ) );
+
+ return new AFPData( AFPData::DFLOAT, $val );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcCount( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'count', 1, count( $args ) ]
+ );
+ }
+
+ if ( $args[0]->type == AFPData::DLIST && count( $args ) == 1 ) {
+ return new AFPData( AFPData::DINT, count( $args[0]->data ) );
+ }
+
+ if ( count( $args ) == 1 ) {
+ $count = count( explode( ',', $args[0]->toString() ) );
+ } else {
+ $needle = $args[0]->toString();
+ $haystack = $args[1]->toString();
+
+ // T62203: Keep empty parameters from causing PHP warnings
+ if ( $needle === '' ) {
+ $count = 0;
+ } else {
+ $count = substr_count( $haystack, $needle );
+ }
+ }
+
+ return new AFPData( AFPData::DINT, $count );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ * @throws Exception
+ */
+ protected function funcRCount( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'rcount', 1, count( $args ) ]
+ );
+ }
+
+ if ( count( $args ) == 1 ) {
+ $count = count( explode( ',', $args[0]->toString() ) );
+ } else {
+ $needle = $args[0]->toString();
+ $haystack = $args[1]->toString();
+
+ # Munge the regex
+ $needle = preg_replace( '!(\\\\\\\\)*(\\\\)?/!', '$1\/', $needle );
+ $needle = "/$needle/u";
+
+ // Omit the '$matches' argument to avoid computing them, just count.
+ $count = preg_match_all( $needle, $haystack );
+
+ if ( $count === false ) {
+ throw new AFPUserVisibleException(
+ 'regexfailure',
+ $this->mCur->pos,
+ [ 'unspecified error in preg_match_all()', $needle ]
+ );
+ }
+ }
+
+ return new AFPData( AFPData::DINT, $count );
+ }
+
+ /**
+ * Returns an array of matches of needle in the haystack, the first one for the whole regex,
+ * the other ones for every capturing group.
+ *
+ * @param array $args
+ * @return AFPData A list of matches.
+ * @throws AFPUserVisibleException
+ */
+ protected function funcGetMatches( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'get_matches', 2, count( $args ) ]
+ );
+ }
+ $needle = $args[0]->toString();
+ $haystack = $args[1]->toString();
+
+ // Count the amount of capturing groups in the submitted pattern.
+ // This way we can return a fixed-dimension array, much easier to manage.
+ // First, strip away escaped parentheses
+ $sanitized = preg_replace( '/(\\\\\\\\)*\\\\\(/', '', $needle );
+ // Then strip starting parentheses of non-capturing groups
+ // (also atomics, lookahead and so on, even if not every of them is supported)
+ $sanitized = preg_replace( '/\(\?/', '', $sanitized );
+ // Finally create an array of falses with dimension = # of capturing groups
+ $groupscount = substr_count( $sanitized, '(' ) + 1;
+ $falsy = array_fill( 0, $groupscount, false );
+
+ // Munge the regex by escaping slashes
+ $needle = preg_replace( '!(\\\\\\\\)*(\\\\)?/!', '$1\/', $needle );
+ $needle = "/$needle/u";
+
+ // Suppress and restore are here for the same reason as T177744
+ Wikimedia\suppressWarnings();
+ $check = preg_match( $needle, $haystack, $matches );
+ Wikimedia\restoreWarnings();
+
+ if ( $check === false ) {
+ throw new AFPUserVisibleException(
+ 'regexfailure',
+ $this->mCur->pos,
+ [ 'unspecified error in preg_match()', $needle ]
+ );
+ }
+
+ // Returned array has non-empty positions identical to the ones returned
+ // by the third parameter of a standard preg_match call ($matches in this case).
+ // We want an union with falsy to return a fixed-dimention array.
+ return AFPData::newFromPHPVar( $matches + $falsy );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcIPInRange( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'ip_in_range', 2, count( $args ) ]
+ );
+ }
+
+ $ip = $args[0]->toString();
+ $range = $args[1]->toString();
+
+ $result = IP::isInRange( $ip, $range );
+
+ return new AFPData( AFPData::DBOOL, $result );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcCCNorm( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'ccnorm', 1, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ $s = html_entity_decode( $s, ENT_QUOTES, 'UTF-8' );
+ $s = $this->ccnorm( $s );
+
+ return new AFPData( AFPData::DSTRING, $s );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcContainsAny( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'contains_any', 2, count( $args ) ]
+ );
+ }
+
+ $s = array_shift( $args );
+
+ return new AFPData( AFPData::DBOOL, self::contains( $s, $args, true ) );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcContainsAll( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'contains_all', 2, count( $args ) ]
+ );
+ }
+
+ $s = array_shift( $args );
+
+ return new AFPData( AFPData::DBOOL, self::contains( $s, $args, false, false ) );
+ }
+
+ /**
+ * Normalize and search a string for multiple substrings in OR mode
+ *
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcCCNormContainsAny( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'ccnorm_contains_any', 2, count( $args ) ]
+ );
+ }
+
+ $s = array_shift( $args );
+
+ return new AFPData( AFPData::DBOOL, self::contains( $s, $args, true, true ) );
+ }
+
+ /**
+ * Normalize and search a string for multiple substrings in AND mode
+ *
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcCCNormContainsAll( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'ccnorm_contains_all', 2, count( $args ) ]
+ );
+ }
+
+ $s = array_shift( $args );
+
+ return new AFPData( AFPData::DBOOL, self::contains( $s, $args, false, true ) );
+ }
+
+ /**
+ * Search for substrings in a string
+ *
+ * Use is_any to determine wether to use logic OR (true) or AND (false).
+ *
+ * Use normalize = true to make use of ccnorm and
+ * normalize both sides of the search.
+ *
+ * @param AFPData $string
+ * @param AFPData[] $values
+ * @param bool $is_any
+ * @param bool $normalize
+ *
+ * @return bool
+ */
+ protected static function contains( $string, $values, $is_any = true, $normalize = false ) {
+ $string = $string->toString();
+ if ( $string == '' ) {
+ return false;
+ }
+
+ if ( $normalize ) {
+ $string = self::ccnorm( $string );
+ }
+
+ foreach ( $values as $needle ) {
+ $needle = $needle->toString();
+ if ( $normalize ) {
+ $needle = self::ccnorm( $needle );
+ }
+ if ( $needle === '' ) {
+ // T62203: Keep empty parameters from causing PHP warnings
+ continue;
+ }
+
+ $is_found = strpos( $string, $needle ) !== false;
+ if ( $is_found === $is_any ) {
+ // If I'm here and it's ANY (OR) it means that something is found.
+ // Just enough! Found!
+ // If I'm here and it's ALL (AND) it means that something isn't found.
+ // Just enough! Not found!
+ return $is_found;
+ }
+ }
+
+ // If I'm here and it's ANY (OR) it means that nothing was found:
+ // return false (because $is_any is true)
+ // If I'm here and it's ALL (AND) it means that everything were found:
+ // return true (because $is_any is false)
+ return ! $is_any;
+ }
+
+ /**
+ * @param string $s
+ * @return mixed
+ */
+ protected static function ccnorm( $s ) {
+ // Instatiate a single version of the equivset so the data is not loaded
+ // more than once.
+ if ( !self::$equivset ) {
+ self::$equivset = new Equivset();
+ }
+
+ return self::$equivset->normalize( $s );
+ }
+
+ /**
+ * @param string $s
+ * @return array|string
+ */
+ protected function rmspecials( $s ) {
+ return preg_replace( '/[^\p{L}\p{N}]/u', '', $s );
+ }
+
+ /**
+ * @param string $s
+ * @return array|string
+ */
+ protected function rmdoubles( $s ) {
+ return preg_replace( '/(.)\1+/us', '\1', $s );
+ }
+
+ /**
+ * @param string $s
+ * @return array|string
+ */
+ protected function rmwhitespace( $s ) {
+ return preg_replace( '/\s+/u', '', $s );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcRMSpecials( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'rmspecials', 1, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ $s = $this->rmspecials( $s );
+
+ return new AFPData( AFPData::DSTRING, $s );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcRMWhitespace( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'rmwhitespace', 1, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ $s = $this->rmwhitespace( $s );
+
+ return new AFPData( AFPData::DSTRING, $s );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcRMDoubles( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'rmdoubles', 1, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ $s = $this->rmdoubles( $s );
+
+ return new AFPData( AFPData::DSTRING, $s );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcNorm( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'norm', 1, count( $args ) ]
+ );
+ }
+ $s = $args[0]->toString();
+
+ $s = $this->ccnorm( $s );
+ $s = $this->rmdoubles( $s );
+ $s = $this->rmspecials( $s );
+ $s = $this->rmwhitespace( $s );
+
+ return new AFPData( AFPData::DSTRING, $s );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcSubstr( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'substr', 2, count( $args ) ]
+ );
+ }
+
+ $s = $args[0]->toString();
+ $offset = $args[1]->toInt();
+
+ if ( isset( $args[2] ) ) {
+ $length = $args[2]->toInt();
+
+ $result = mb_substr( $s, $offset, $length );
+ } else {
+ $result = mb_substr( $s, $offset );
+ }
+
+ return new AFPData( AFPData::DSTRING, $result );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcStrPos( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'strpos', 2, count( $args ) ]
+ );
+ }
+
+ $haystack = $args[0]->toString();
+ $needle = $args[1]->toString();
+
+ // T62203: Keep empty parameters from causing PHP warnings
+ if ( $needle === '' ) {
+ return new AFPData( AFPData::DINT, -1 );
+ }
+
+ if ( isset( $args[2] ) ) {
+ $offset = $args[2]->toInt();
+
+ $result = mb_strpos( $haystack, $needle, $offset );
+ } else {
+ $result = mb_strpos( $haystack, $needle );
+ }
+
+ if ( $result === false ) {
+ $result = -1;
+ }
+
+ return new AFPData( AFPData::DINT, $result );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcStrReplace( $args ) {
+ if ( count( $args ) < 3 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'str_replace', 3, count( $args ) ]
+ );
+ }
+
+ $subject = $args[0]->toString();
+ $search = $args[1]->toString();
+ $replace = $args[2]->toString();
+
+ return new AFPData( AFPData::DSTRING, str_replace( $search, $replace, $subject ) );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function funcStrRegexEscape( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException( 'notenoughargs', $this->mCur->pos,
+ [ 'rescape', 1, count( $args ) ] );
+ }
+
+ $string = $args[0]->toString();
+
+ // preg_quote does not need the second parameter, since rlike takes
+ // care of the delimiter symbol itself
+ return new AFPData( AFPData::DSTRING, preg_quote( $string ) );
+ }
+
+ /**
+ * @param array $args
+ * @return mixed
+ * @throws AFPUserVisibleException
+ */
+ protected function funcSetVar( $args ) {
+ if ( count( $args ) < 2 ) {
+ throw new AFPUserVisibleException(
+ 'notenoughargs',
+ $this->mCur->pos,
+ [ 'set_var', 2, count( $args ) ]
+ );
+ }
+
+ $varName = $args[0]->toString();
+ $value = $args[1];
+
+ $this->setUserVariable( $varName, $value );
+
+ return $value;
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function castString( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, [ __METHOD__ ] );
+ }
+ $val = $args[0];
+
+ return AFPData::castTypes( $val, AFPData::DSTRING );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function castInt( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, [ __METHOD__ ] );
+ }
+ $val = $args[0];
+
+ return AFPData::castTypes( $val, AFPData::DINT );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function castFloat( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, [ __METHOD__ ] );
+ }
+ $val = $args[0];
+
+ return AFPData::castTypes( $val, AFPData::DFLOAT );
+ }
+
+ /**
+ * @param array $args
+ * @return AFPData
+ * @throws AFPUserVisibleException
+ */
+ protected function castBool( $args ) {
+ if ( count( $args ) < 1 ) {
+ throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, [ __METHOD__ ] );
+ }
+ $val = $args[0];
+
+ return AFPData::castTypes( $val, AFPData::DBOOL );
+ }
+}