summaryrefslogtreecommitdiff
path: root/platform/www/inc/Parsing/Lexer/ParallelRegex.php
diff options
context:
space:
mode:
Diffstat (limited to 'platform/www/inc/Parsing/Lexer/ParallelRegex.php')
-rw-r--r--platform/www/inc/Parsing/Lexer/ParallelRegex.php203
1 files changed, 203 insertions, 0 deletions
diff --git a/platform/www/inc/Parsing/Lexer/ParallelRegex.php b/platform/www/inc/Parsing/Lexer/ParallelRegex.php
new file mode 100644
index 0000000..96f61a1
--- /dev/null
+++ b/platform/www/inc/Parsing/Lexer/ParallelRegex.php
@@ -0,0 +1,203 @@
+<?php
+/**
+ * Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/
+ * For an intro to the Lexer see:
+ * https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes
+ *
+ * @author Marcus Baker http://www.lastcraft.com
+ */
+
+namespace dokuwiki\Parsing\Lexer;
+
+/**
+ * Compounded regular expression.
+ *
+ * Any of the contained patterns could match and when one does it's label is returned.
+ */
+class ParallelRegex
+{
+ /** @var string[] patterns to match */
+ protected $patterns;
+ /** @var string[] labels for above patterns */
+ protected $labels;
+ /** @var string the compound regex matching all patterns */
+ protected $regex;
+ /** @var bool case sensitive matching? */
+ protected $case;
+
+ /**
+ * Constructor. Starts with no patterns.
+ *
+ * @param boolean $case True for case sensitive, false
+ * for insensitive.
+ */
+ public function __construct($case)
+ {
+ $this->case = $case;
+ $this->patterns = array();
+ $this->labels = array();
+ $this->regex = null;
+ }
+
+ /**
+ * Adds a pattern with an optional label.
+ *
+ * @param mixed $pattern Perl style regex. Must be UTF-8
+ * encoded. If its a string, the (, )
+ * lose their meaning unless they
+ * form part of a lookahead or
+ * lookbehind assertation.
+ * @param bool|string $label Label of regex to be returned
+ * on a match. Label must be ASCII
+ */
+ public function addPattern($pattern, $label = true)
+ {
+ $count = count($this->patterns);
+ $this->patterns[$count] = $pattern;
+ $this->labels[$count] = $label;
+ $this->regex = null;
+ }
+
+ /**
+ * Attempts to match all patterns at once against a string.
+ *
+ * @param string $subject String to match against.
+ * @param string $match First matched portion of
+ * subject.
+ * @return bool|string False if no match found, label if label exists, true if not
+ */
+ public function match($subject, &$match)
+ {
+ if (count($this->patterns) == 0) {
+ return false;
+ }
+ if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
+ $match = "";
+ return false;
+ }
+
+ $match = $matches[0];
+ $size = count($matches);
+ // FIXME this could be made faster by storing the labels as keys in a hashmap
+ for ($i = 1; $i < $size; $i++) {
+ if ($matches[$i] && isset($this->labels[$i - 1])) {
+ return $this->labels[$i - 1];
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Attempts to split the string against all patterns at once
+ *
+ * @param string $subject String to match against.
+ * @param array $split The split result: array containing, pre-match, match & post-match strings
+ * @return boolean True on success.
+ *
+ * @author Christopher Smith <chris@jalakai.co.uk>
+ */
+ public function split($subject, &$split)
+ {
+ if (count($this->patterns) == 0) {
+ return false;
+ }
+
+ if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
+ if (function_exists('preg_last_error')) {
+ $err = preg_last_error();
+ switch ($err) {
+ case PREG_BACKTRACK_LIMIT_ERROR:
+ msg('A PCRE backtrack error occured. Try to increase the pcre.backtrack_limit in php.ini', -1);
+ break;
+ case PREG_RECURSION_LIMIT_ERROR:
+ msg('A PCRE recursion error occured. Try to increase the pcre.recursion_limit in php.ini', -1);
+ break;
+ case PREG_BAD_UTF8_ERROR:
+ msg('A PCRE UTF-8 error occured. This might be caused by a faulty plugin', -1);
+ break;
+ case PREG_INTERNAL_ERROR:
+ msg('A PCRE internal error occured. This might be caused by a faulty plugin', -1);
+ break;
+ }
+ }
+
+ $split = array($subject, "", "");
+ return false;
+ }
+
+ $idx = count($matches)-2;
+ list($pre, $post) = preg_split($this->patterns[$idx].$this->getPerlMatchingFlags(), $subject, 2);
+ $split = array($pre, $matches[0], $post);
+
+ return isset($this->labels[$idx]) ? $this->labels[$idx] : true;
+ }
+
+ /**
+ * Compounds the patterns into a single
+ * regular expression separated with the
+ * "or" operator. Caches the regex.
+ * Will automatically escape (, ) and / tokens.
+ *
+ * @return null|string
+ */
+ protected function getCompoundedRegex()
+ {
+ if ($this->regex == null) {
+ $cnt = count($this->patterns);
+ for ($i = 0; $i < $cnt; $i++) {
+ /*
+ * decompose the input pattern into "(", "(?", ")",
+ * "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"...
+ * elements.
+ */
+ preg_match_all('/\\\\.|' .
+ '\(\?|' .
+ '[()]|' .
+ '\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' .
+ '[^[()\\\\]+/', $this->patterns[$i], $elts);
+
+ $pattern = "";
+ $level = 0;
+
+ foreach ($elts[0] as $elt) {
+ /*
+ * for "(", ")" remember the nesting level, add "\"
+ * only to the non-"(?" ones.
+ */
+
+ switch ($elt) {
+ case '(':
+ $pattern .= '\(';
+ break;
+ case ')':
+ if ($level > 0)
+ $level--; /* closing (? */
+ else $pattern .= '\\';
+ $pattern .= ')';
+ break;
+ case '(?':
+ $level++;
+ $pattern .= '(?';
+ break;
+ default:
+ if (substr($elt, 0, 1) == '\\')
+ $pattern .= $elt;
+ else $pattern .= str_replace('/', '\/', $elt);
+ }
+ }
+ $this->patterns[$i] = "($pattern)";
+ }
+ $this->regex = "/" . implode("|", $this->patterns) . "/" . $this->getPerlMatchingFlags();
+ }
+ return $this->regex;
+ }
+
+ /**
+ * Accessor for perl regex mode flags to use.
+ * @return string Perl regex flags.
+ */
+ protected function getPerlMatchingFlags()
+ {
+ return ($this->case ? "msS" : "msSi");
+ }
+}