summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/RegexFunctions/RegexFunctions.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/extensions/RegexFunctions/RegexFunctions.php')
-rw-r--r--www/wiki/extensions/RegexFunctions/RegexFunctions.php228
1 files changed, 228 insertions, 0 deletions
diff --git a/www/wiki/extensions/RegexFunctions/RegexFunctions.php b/www/wiki/extensions/RegexFunctions/RegexFunctions.php
new file mode 100644
index 00000000..1fa34329
--- /dev/null
+++ b/www/wiki/extensions/RegexFunctions/RegexFunctions.php
@@ -0,0 +1,228 @@
+<?php
+/**
+ * RegexFunctions extension -- Regular Expression parser functions
+ *
+ * @file
+ * @ingroup Extensions
+ * @author Ryan Schmidt
+ * @license http://en.wikipedia.org/wiki/Public_domain Public domain
+ * @link http://www.mediawiki.org/wiki/Extension:RegexFunctions Documentation
+ */
+
+if( !defined( 'MEDIAWIKI' ) ) {
+ echo "This file is an extension of the MediaWiki software and cannot be used standalone\n";
+ die( 1 );
+}
+
+// Extension credits that will show up on Special:Version
+$wgExtensionCredits['parserhook'][] = array(
+ 'path' => __FILE__,
+ 'name' => 'RegexFunctions',
+ 'author' => 'Ryan Schmidt',
+ 'version' => '1.5.0',
+ 'descriptionmsg' => 'regexfunctions-desc',
+ 'url' => 'https://www.mediawiki.org/wiki/Extension:RegexFunctions',
+);
+
+$dir = dirname( __FILE__ ) . '/';
+$wgMessagesDirs['RegexFunctions'] = __DIR__ . '/i18n';
+$wgExtensionMessagesFiles['RegexFunctions'] = $dir . 'RegexFunctions.i18n.php';
+$wgExtensionMessagesFiles['RegexFunctionsMagic'] = $dir . 'RegexFunctions.i18n.magic.php';
+
+$wgHooks['ParserFirstCallInit'][] = 'ExtRegexFunctions::onParserFirstCallInit';
+$wgHooks['ParserClearState'][] = 'ExtRegexFunctions::onParserClearState';
+
+// default globals
+// how many functions are allowed in a single page? Keep this at least above 3 for usability
+$wgRegexFunctionsPerPage = 10;
+// should we allow modifiers in the functions, e.g. the /i modifier for case-insensitive?
+$wgRegexFunctionsAllowModifiers = true;
+// should we allow internal options to be set (e.g. (?opts) or (?opts:some regex))
+$wgRegexFunctionsAllowOptions = true;
+// limit for rsplit and rreplace functions. -1 is unlimited
+$wgRegexFunctionsLimit = -1;
+// array of functions to disable, aka these functions cannot be used :)
+$wgRegexFunctionsDisable = array();
+
+class ExtRegexFunctions {
+ private static $num = 0;
+ private static $modifiers = array(
+ 'i', 'm', 's', 'x', 'A', 'D', 'S', 'U', 'X', 'J', 'u', 'e'
+ );
+ private static $options = array( 'i', 'm', 's', 'x', 'U', 'X', 'J' );
+
+ public static function onParserFirstCallInit( $parser ) {
+ $parser->setFunctionHook( 'rmatch', array( __CLASS__, 'rmatch' ) );
+ $parser->setFunctionHook( 'rsplit', array( __CLASS__, 'rsplit' ) );
+ $parser->setFunctionHook( 'rreplace', array( __CLASS__, 'rreplace' ) );
+ return true;
+ }
+
+ public static function onParserClearState( $parser ) {
+ self::$num = 0;
+ return true;
+ }
+
+ public static function rmatch( &$parser, $string = '', $pattern = '', $return = '', $notfound = '', $offset = 0 ) {
+ global $wgRegexFunctionsPerPage, $wgRegexFunctionsAllowModifiers, $wgRegexFunctionsDisable;
+ if( in_array( 'rmatch', $wgRegexFunctionsDisable ) ) {
+ return;
+ }
+ self::$num++;
+ if( self::$num > $wgRegexFunctionsPerPage ) {
+ return;
+ }
+ $pattern = self::sanitize(
+ $pattern,
+ $wgRegexFunctionsAllowModifiers
+ );
+ $num = preg_match(
+ $pattern, $string, $matches, PREG_OFFSET_CAPTURE, (int) $offset
+ );
+ if ( $num === false ) {
+ return;
+ }
+ if ( $num === 0 ) {
+ if ( $notfound == '$0' ) {
+ //Return the original string if specified to display it with $0.
+ return $string;
+ }
+ return $notfound;
+ }
+
+ // change all backslashes to $
+ $return = str_replace( '\\', '%$', $return );
+ $return = preg_replace_callback(
+ '/%?\$%?\$([0-9]+)/',
+ function ( $_callbackMatches ) use ( $matches ) {
+ return array_key_exists($_callbackMatches[1], $matches) ? $matches[$_callbackMatches[1]][1] : '';
+ },
+ $return
+ );
+ $return = preg_replace_callback(
+ '/%?\$%?\$\{([0-9]+)\}/',
+ function ( $_callbackMatches ) use ( $matches ) {
+ return array_key_exists($_callbackMatches[1], $matches) ? $matches[$_callbackMatches[1]][1] : '';
+ },
+ $return
+ );
+ $return = preg_replace_callback(
+ '/%?\$([0-9]+)/',
+ function ( $_callbackMatches ) use ( $matches ) {
+ return array_key_exists($_callbackMatches[1], $matches) ? $matches[$_callbackMatches[1]][0] : '';
+ },
+ $return
+ );
+ $return = preg_replace_callback(
+ '/%?\$\{([0-9]+)\}/',
+ function ( $_callbackMatches ) use ( $matches ) {
+ return array_key_exists($_callbackMatches[1], $matches) ? $matches[$_callbackMatches[1]][0] : '';
+ },
+ $return
+ );
+ $return = str_replace( '%$', '\\', $return );
+
+ return $return;
+ }
+
+ public static function rsplit( &$parser, $string = '', $pattern = '', $piece = 0 ) {
+ global $wgRegexFunctionsPerPage, $wgRegexFunctionsAllowModifiers, $wgRegexFunctionsLimit, $wgRegexFunctionsDisable;
+ if( in_array( 'rsplit', $wgRegexFunctionsDisable ) ) {
+ return;
+ }
+ self::$num++;
+ if( self::$num > $wgRegexFunctionsPerPage ) {
+ return;
+ }
+ $pattern = self::sanitize(
+ $pattern,
+ $wgRegexFunctionsAllowModifiers
+ );
+ $res = preg_split( $pattern, $string, $wgRegexFunctionsLimit );
+ $p = (int) $piece;
+ // allow negative pieces to work from the end of the array
+ if( $p < 0 ) {
+ $p = $p + count( $res );
+ }
+ // sanitation for pieces that don't exist
+ if( $p < 0 ) {
+ $p = 0;
+ }
+ if( $p >= count( $res ) ) {
+ $p = count( $res ) - 1;
+ }
+ return $res[$p];
+ }
+
+ public static function rreplace( &$parser, $string = '', $pattern = '', $replace = '' ) {
+ global $wgRegexFunctionsPerPage, $wgRegexFunctionsAllowModifiers, $wgRegexFunctionsLimit, $wgRegexFunctionsDisable;
+ if( in_array( 'rreplace', $wgRegexFunctionsDisable ) ) {
+ return;
+ }
+ self::$num++;
+ if( self::$num > $wgRegexFunctionsPerPage ) {
+ return;
+ }
+ $pattern = self::sanitize(
+ str_replace(chr(0), '', $pattern),
+ $wgRegexFunctionsAllowModifiers
+ );
+ $res = preg_replace(
+ $pattern,
+ $replace,
+ $string,
+ $wgRegexFunctionsLimit
+ );
+ return $res;
+ }
+
+ // santizes a regex pattern
+ private static function sanitize( $pattern, $m = false ) {
+ if( preg_match( '/^\/(.*)([^\\\\])\/(.*?)$/', $pattern, $matches ) ) {
+ $pat = preg_replace_callback(
+ '/([^\\\\])?\(\?(.*\:)?(.*)\)/U',
+ function ( $_callbackMatches ) {
+ return "{$_callbackMatches[1]}(" . self::cleanupInternal( $_callbackMatches[2] ) . "{$_callbackMatches[3]})";
+ },
+ $matches[1] . $matches[2]
+ );
+ $ret = '/' . $pat . '/';
+ if( $m ) {
+ $mod = '';
+ foreach( self::$modifiers as $val ) {
+ if( strpos( $matches[3], $val ) !== false ) {
+ $mod .= $val;
+ }
+ }
+ $mod = str_replace( 'e', '', $mod ); //Get rid of eval modifier.
+ $ret .= $mod;
+ }
+ } else {
+ $pat = preg_replace_callback(
+ '/([^\\\\])?\(\?(.*\:)?(.*)\)/U',
+ function ( $_callbackMatches ) {
+ return "{$_callbackMatches[1]}(" . self::cleanupInternal( $_callbackMatches[2] ) . "{$_callbackMatches[3]})";
+ },
+ $pattern
+ );
+ $pat = preg_replace( '!([^\\\\])/!', '$1\\/', $pat );
+ $ret = '/' . $pat . '/';
+ }
+ return $ret;
+ }
+
+ // cleans up internal options, making sure they are valid
+ private static function cleanupInternal( $str ) {
+ global $wgRegexFunctionsAllowOptions;
+ $ret = '';
+ if ( !$wgRegexFunctionsAllowOptions ) {
+ return '';
+ }
+ foreach ( self::$options as $opt ) {
+ if( strpos( $str, $opt ) !== false ) {
+ $ret .= $opt;
+ }
+ }
+ return $ret;
+ }
+}