11 files changed, 4180 insertions, 0 deletions
diff --git a/platform/www/inc/Utf8/Asian.php b/platform/www/inc/Utf8/Asian.php
new file mode 100644
index 0000000..c7baa30
--- /dev/null
+++ b/platform/www/inc/Utf8/Asian.php
@@ -0,0 +1,99 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * Methods and constants to handle Asian "words"
+ *
+ * This uses a crude regexp to determine which parts of an Asian string should be treated as words.
+ * This is necessary because in some Asian languages a single unicode char represents a whole idea
+ * without spaces separating them.
+ */
+class Asian
+{
+
+    /**
+     * This defines a non-capturing group for the use in regular expressions to match any asian character that
+     * needs to be treated as a word. Uses the Unicode-Ranges for Asian characters taken from
+     * http://en.wikipedia.org/wiki/Unicode_block
+     */
+    const REGEXP =
+        '(?:' .
+
+        '[\x{0E00}-\x{0E7F}]' . // Thai
+
+        '|' .
+
+        '[' .
+        '\x{2E80}-\x{3040}' .  // CJK -> Hangul
+        '\x{309D}-\x{30A0}' .
+        '\x{30FD}-\x{31EF}\x{3200}-\x{D7AF}' .
+        '\x{F900}-\x{FAFF}' .  // CJK Compatibility Ideographs
+        '\x{FE30}-\x{FE4F}' .  // CJK Compatibility Forms
+        "\xF0\xA0\x80\x80-\xF0\xAA\x9B\x9F" . // CJK Extension B
+        "\xF0\xAA\x9C\x80-\xF0\xAB\x9C\xBF" . // CJK Extension C
+        "\xF0\xAB\x9D\x80-\xF0\xAB\xA0\x9F" . // CJK Extension D
+        "\xF0\xAF\xA0\x80-\xF0\xAF\xAB\xBF" . // CJK Compatibility Supplement
+        ']' .
+
+        '|' .
+
+        '[' .                // Hiragana/Katakana (can be two characters)
+        '\x{3042}\x{3044}\x{3046}\x{3048}' .
+        '\x{304A}-\x{3062}\x{3064}-\x{3082}' .
+        '\x{3084}\x{3086}\x{3088}-\x{308D}' .
+        '\x{308F}-\x{3094}' .
+        '\x{30A2}\x{30A4}\x{30A6}\x{30A8}' .
+        '\x{30AA}-\x{30C2}\x{30C4}-\x{30E2}' .
+        '\x{30E4}\x{30E6}\x{30E8}-\x{30ED}' .
+        '\x{30EF}-\x{30F4}\x{30F7}-\x{30FA}' .
+        '][' .
+        '\x{3041}\x{3043}\x{3045}\x{3047}\x{3049}' .
+        '\x{3063}\x{3083}\x{3085}\x{3087}\x{308E}\x{3095}-\x{309C}' .
+        '\x{30A1}\x{30A3}\x{30A5}\x{30A7}\x{30A9}' .
+        '\x{30C3}\x{30E3}\x{30E5}\x{30E7}\x{30EE}\x{30F5}\x{30F6}\x{30FB}\x{30FC}' .
+        '\x{31F0}-\x{31FF}' .
+        ']?' .
+        ')';
+
+
+    /**
+     * Check if the given term contains Asian word characters
+     *
+     * @param string $term
+     * @return bool
+     */
+    public static function isAsianWords($term)
+    {
+        return (bool)preg_match('/' . self::REGEXP . '/u', $term);
+    }
+
+    /**
+     * Surround all Asian words in the given text with the given separator
+     *
+     * @param string $text Original text containing asian words
+     * @param string $sep the separator to use
+     * @return string Text with separated asian words
+     */
+    public static function separateAsianWords($text, $sep = ' ')
+    {
+        // handle asian chars as single words (may fail on older PHP version)
+        $asia = @preg_replace('/(' . self::REGEXP . ')/u', $sep . '\1' . $sep, $text);
+        if (!is_null($asia)) $text = $asia; // recover from regexp falure
+
+        return $text;
+    }
+
+    /**
+     * Split the given text into separate parts
+     *
+     * Each part is either a non-asian string, or a single asian word
+     *
+     * @param string $term
+     * @return string[]
+     */
+    public static function splitAsianWords($term)
+    {
+        return preg_split('/(' . self::REGEXP . '+)/u', $term, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
+    }
+}
diff --git a/platform/www/inc/Utf8/Clean.php b/platform/www/inc/Utf8/Clean.php
new file mode 100644
index 0000000..0975ff5
--- /dev/null
+++ b/platform/www/inc/Utf8/Clean.php
@@ -0,0 +1,204 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * Methods to assess and clean UTF-8 strings
+ */
+class Clean
+{
+    /**
+     * Checks if a string contains 7bit ASCII only
+     *
+     * @author Andreas Haerter <andreas.haerter@dev.mail-node.com>
+     *
+     * @param string $str
+     * @return bool
+     */
+    public static function isASCII($str)
+    {
+        return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1);
+    }
+
+    /**
+     * Tries to detect if a string is in Unicode encoding
+     *
+     * @author <bmorel@ssi.fr>
+     * @link   http://php.net/manual/en/function.utf8-encode.php
+     *
+     * @param string $str
+     * @return bool
+     */
+    public static function isUtf8($str)
+    {
+        $len = strlen($str);
+        for ($i = 0; $i < $len; $i++) {
+            $b = ord($str[$i]);
+            if ($b < 0x80) continue; # 0bbbbbbb
+            elseif (($b & 0xE0) === 0xC0) $n = 1; # 110bbbbb
+            elseif (($b & 0xF0) === 0xE0) $n = 2; # 1110bbbb
+            elseif (($b & 0xF8) === 0xF0) $n = 3; # 11110bbb
+            elseif (($b & 0xFC) === 0xF8) $n = 4; # 111110bb
+            elseif (($b & 0xFE) === 0xFC) $n = 5; # 1111110b
+            else return false; # Does not match any model
+
+            for ($j = 0; $j < $n; $j++) { # n bytes matching 10bbbbbb follow ?
+                if ((++$i === $len) || ((ord($str[$i]) & 0xC0) !== 0x80))
+                    return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Strips all high byte chars
+     *
+     * Returns a pure ASCII7 string
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     *
+     * @param string $str
+     * @return string
+     */
+    public static function strip($str)
+    {
+        $ascii = '';
+        $len = strlen($str);
+        for ($i = 0; $i < $len; $i++) {
+            if (ord($str[$i]) < 128) {
+                $ascii .= $str[$i];
+            }
+        }
+        return $ascii;
+    }
+
+    /**
+     * Removes special characters (nonalphanumeric) from a UTF-8 string
+     *
+     * This function adds the controlchars 0x00 to 0x19 to the array of
+     * stripped chars (they are not included in $UTF8_SPECIAL_CHARS)
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     *
+     * @param  string $string The UTF8 string to strip of special chars
+     * @param  string $repl Replace special with this string
+     * @param  string $additional Additional chars to strip (used in regexp char class)
+     * @return string
+     */
+    public static function stripspecials($string, $repl = '', $additional = '')
+    {
+        static $specials = null;
+        if ($specials === null) {
+            $specials = preg_quote(Table::specialChars(), '/');
+        }
+
+        return preg_replace('/[' . $additional . '\x00-\x19' . $specials . ']/u', $repl, $string);
+    }
+
+    /**
+     * Replace bad bytes with an alternative character
+     *
+     * ASCII character is recommended for replacement char
+     *
+     * PCRE Pattern to locate bad bytes in a UTF-8 string
+     * Comes from W3 FAQ: Multilingual Forms
+     * Note: modified to include full ASCII range including control chars
+     *
+     * @author Harry Fuecks <hfuecks@gmail.com>
+     * @see http://www.w3.org/International/questions/qa-forms-utf-8
+     *
+     * @param string $str to search
+     * @param string $replace to replace bad bytes with (defaults to '?') - use ASCII
+     * @return string
+     */
+    public static function replaceBadBytes($str, $replace = '')
+    {
+        $UTF8_BAD =
+            '([\x00-\x7F]' .                          # ASCII (including control chars)
+            '|[\xC2-\xDF][\x80-\xBF]' .               # non-overlong 2-byte
+            '|\xE0[\xA0-\xBF][\x80-\xBF]' .           # excluding overlongs
+            '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' .    # straight 3-byte
+            '|\xED[\x80-\x9F][\x80-\xBF]' .           # excluding surrogates
+            '|\xF0[\x90-\xBF][\x80-\xBF]{2}' .        # planes 1-3
+            '|[\xF1-\xF3][\x80-\xBF]{3}' .            # planes 4-15
+            '|\xF4[\x80-\x8F][\x80-\xBF]{2}' .        # plane 16
+            '|(.{1}))';                               # invalid byte
+        ob_start();
+        while (preg_match('/' . $UTF8_BAD . '/S', $str, $matches)) {
+            if (!isset($matches[2])) {
+                echo $matches[0];
+            } else {
+                echo $replace;
+            }
+            $str = substr($str, strlen($matches[0]));
+        }
+        return ob_get_clean();
+    }
+
+
+    /**
+     * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents
+     *
+     * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1)
+     * letters. Default is to deaccent both cases ($case = 0)
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     *
+     * @param string $string
+     * @param int $case
+     * @return string
+     */
+    public static function deaccent($string, $case = 0)
+    {
+        if ($case <= 0) {
+            $string = strtr($string, Table::lowerAccents());
+        }
+        if ($case >= 0) {
+            $string = strtr($string, Table::upperAccents());
+        }
+        return $string;
+    }
+
+    /**
+     * Romanize a non-latin string
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     *
+     * @param string $string
+     * @return string
+     */
+    public static function romanize($string)
+    {
+        if (self::isASCII($string)) return $string; //nothing to do
+
+        return strtr($string, Table::romanization());
+    }
+
+    /**
+     * adjust a byte index into a utf8 string to a utf8 character boundary
+     *
+     * @author       chris smith <chris@jalakai.co.uk>
+     *
+     * @param string $str utf8 character string
+     * @param int $i byte index into $str
+     * @param bool $next direction to search for boundary, false = up (current character) true = down (next character)
+     * @return int byte index into $str now pointing to a utf8 character boundary
+     */
+    public static function correctIdx($str, $i, $next = false)
+    {
+
+        if ($i <= 0) return 0;
+
+        $limit = strlen($str);
+        if ($i >= $limit) return $limit;
+
+        if ($next) {
+            while (($i < $limit) && ((ord($str[$i]) & 0xC0) === 0x80)) $i++;
+        } else {
+            while ($i && ((ord($str[$i]) & 0xC0) === 0x80)) $i--;
+        }
+
+        return $i;
+    }
+
+}
diff --git a/platform/www/inc/Utf8/Conversion.php b/platform/www/inc/Utf8/Conversion.php
new file mode 100644
index 0000000..fad9cd0
--- /dev/null
+++ b/platform/www/inc/Utf8/Conversion.php
@@ -0,0 +1,162 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * Methods to convert from and to UTF-8 strings
+ */
+class Conversion
+{
+
+    /**
+     * Encodes UTF-8 characters to HTML entities
+     *
+     * @author Tom N Harris <tnharris@whoopdedo.org>
+     * @author <vpribish at shopping dot com>
+     * @link   http://php.net/manual/en/function.utf8-decode.php
+     *
+     * @param string $str
+     * @param bool $all Encode non-utf8 char to HTML as well
+     * @return string
+     */
+    public static function toHtml($str, $all = false)
+    {
+        $ret = '';
+        foreach (Unicode::fromUtf8($str) as $cp) {
+            if ($cp < 0x80 && !$all) {
+                $ret .= chr($cp);
+            } elseif ($cp < 0x100) {
+                $ret .= "&#$cp;";
+            } else {
+                $ret .= '&#x' . dechex($cp) . ';';
+            }
+        }
+        return $ret;
+    }
+
+    /**
+     * Decodes HTML entities to UTF-8 characters
+     *
+     * Convert any &#..; entity to a codepoint,
+     * The entities flag defaults to only decoding numeric entities.
+     * Pass HTML_ENTITIES and named entities, including &amp; &lt; etc.
+     * are handled as well. Avoids the problem that would occur if you
+     * had to decode "&amp;#38;&#38;amp;#38;"
+     *
+     * unhtmlspecialchars(\dokuwiki\Utf8\Conversion::fromHtml($s)) -> "&#38;&#38;"
+     * \dokuwiki\Utf8\Conversion::fromHtml(unhtmlspecialchars($s)) -> "&&amp#38;"
+     * what it should be                   -> "&#38;&amp#38;"
+     *
+     * @author Tom N Harris <tnharris@whoopdedo.org>
+     *
+     * @param  string $str UTF-8 encoded string
+     * @param  boolean $entities decode name entities in addtition to numeric ones
+     * @return string  UTF-8 encoded string with numeric (and named) entities replaced.
+     */
+    public static function fromHtml($str, $entities = false)
+    {
+        if (!$entities) {
+            return preg_replace_callback(
+                '/(&#([Xx])?([0-9A-Za-z]+);)/m',
+                [__CLASS__, 'decodeNumericEntity'],
+                $str
+            );
+        }
+
+        return preg_replace_callback(
+            '/&(#)?([Xx])?([0-9A-Za-z]+);/m',
+            [__CLASS__, 'decodeAnyEntity'],
+            $str
+        );
+    }
+
+    /**
+     * Decodes any HTML entity to it's correct UTF-8 char equivalent
+     *
+     * @param string $ent An entity
+     * @return string
+     */
+    protected static function decodeAnyEntity($ent)
+    {
+        // create the named entity lookup table
+        static $table = null;
+        if ($table === null) {
+            $table = get_html_translation_table(HTML_ENTITIES);
+            $table = array_flip($table);
+            $table = array_map(
+                static function ($c) {
+                    return Unicode::toUtf8(array(ord($c)));
+                },
+                $table
+            );
+        }
+
+        if ($ent[1] === '#') {
+            return self::decodeNumericEntity($ent);
+        }
+
+        if (array_key_exists($ent[0], $table)) {
+            return $table[$ent[0]];
+        }
+
+        return $ent[0];
+    }
+
+    /**
+     * Decodes numeric HTML entities to their correct UTF-8 characters
+     *
+     * @param $ent string A numeric entity
+     * @return string|false
+     */
+    protected static function decodeNumericEntity($ent)
+    {
+        switch ($ent[2]) {
+            case 'X':
+            case 'x':
+                $cp = hexdec($ent[3]);
+                break;
+            default:
+                $cp = intval($ent[3]);
+                break;
+        }
+        return Unicode::toUtf8(array($cp));
+    }
+
+    /**
+     * UTF-8 to UTF-16BE conversion.
+     *
+     * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
+     *
+     * @param string $str
+     * @param bool $bom
+     * @return string
+     */
+    public static function toUtf16be($str, $bom = false)
+    {
+        $out = $bom ? "\xFE\xFF" : '';
+        if (UTF8_MBSTRING) {
+            return $out . mb_convert_encoding($str, 'UTF-16BE', 'UTF-8');
+        }
+
+        $uni = Unicode::fromUtf8($str);
+        foreach ($uni as $cp) {
+            $out .= pack('n', $cp);
+        }
+        return $out;
+    }
+
+    /**
+     * UTF-8 to UTF-16BE conversion.
+     *
+     * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
+     *
+     * @param string $str
+     * @return false|string
+     */
+    public static function fromUtf16be($str)
+    {
+        $uni = unpack('n*', $str);
+        return Unicode::toUtf8($uni);
+    }
+
+}
diff --git a/platform/www/inc/Utf8/PhpString.php b/platform/www/inc/Utf8/PhpString.php
new file mode 100644
index 0000000..5bcd601
--- /dev/null
+++ b/platform/www/inc/Utf8/PhpString.php
@@ -0,0 +1,383 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * UTF-8 aware equivalents to PHP's string functions
+ */
+class PhpString
+{
+
+    /**
+     * A locale independent basename() implementation
+     *
+     * works around a bug in PHP's basename() implementation
+     *
+     * @param string $path A path
+     * @param string $suffix If the name component ends in suffix this will also be cut off
+     * @return string
+     * @link   https://bugs.php.net/bug.php?id=37738
+     *
+     * @see basename()
+     */
+    public static function basename($path, $suffix = '')
+    {
+        $path = trim($path, '\\/');
+        $rpos = max(strrpos($path, '/'), strrpos($path, '\\'));
+        if ($rpos) {
+            $path = substr($path, $rpos + 1);
+        }
+
+        $suflen = strlen($suffix);
+        if ($suflen && (substr($path, -$suflen) === $suffix)) {
+            $path = substr($path, 0, -$suflen);
+        }
+
+        return $path;
+    }
+
+    /**
+     * Unicode aware replacement for strlen()
+     *
+     * utf8_decode() converts characters that are not in ISO-8859-1
+     * to '?', which, for the purpose of counting, is alright - It's
+     * even faster than mb_strlen.
+     *
+     * @param string $string
+     * @return int
+     * @see    utf8_decode()
+     *
+     * @author <chernyshevsky at hotmail dot com>
+     * @see    strlen()
+     */
+    public static function strlen($string)
+    {
+        if (function_exists('utf8_decode')) {
+            return strlen(utf8_decode($string));
+        }
+
+        if (UTF8_MBSTRING) {
+            return mb_strlen($string, 'UTF-8');
+        }
+
+        if (function_exists('iconv_strlen')) {
+            return iconv_strlen($string, 'UTF-8');
+        }
+
+        return strlen($string);
+    }
+
+    /**
+     * UTF-8 aware alternative to substr
+     *
+     * Return part of a string given character offset (and optionally length)
+     *
+     * @param string $str
+     * @param int $offset number of UTF-8 characters offset (from left)
+     * @param int $length (optional) length in UTF-8 characters from offset
+     * @return string
+     * @author Harry Fuecks <hfuecks@gmail.com>
+     * @author Chris Smith <chris@jalakai.co.uk>
+     *
+     */
+    public static function substr($str, $offset, $length = null)
+    {
+        if (UTF8_MBSTRING) {
+            if ($length === null) {
+                return mb_substr($str, $offset);
+            }
+
+            return mb_substr($str, $offset, $length);
+        }
+
+        /*
+         * Notes:
+         *
+         * no mb string support, so we'll use pcre regex's with 'u' flag
+         * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for
+         * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536)
+         *
+         * substr documentation states false can be returned in some cases (e.g. offset > string length)
+         * mb_substr never returns false, it will return an empty string instead.
+         *
+         * calculating the number of characters in the string is a relatively expensive operation, so
+         * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length
+         */
+
+        // cast parameters to appropriate types to avoid multiple notices/warnings
+        $str = (string)$str;                          // generates E_NOTICE for PHP4 objects, but not PHP5 objects
+        $offset = (int)$offset;
+        if ($length !== null) $length = (int)$length;
+
+        // handle trivial cases
+        if ($length === 0) return '';
+        if ($offset < 0 && $length < 0 && $length < $offset) return '';
+
+        $offset_pattern = '';
+        $length_pattern = '';
+
+        // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!)
+        if ($offset < 0) {
+            $strlen = self::strlen($str);        // see notes
+            $offset = $strlen + $offset;
+            if ($offset < 0) $offset = 0;
+        }
+
+        // establish a pattern for offset, a non-captured group equal in length to offset
+        if ($offset > 0) {
+            $Ox = (int)($offset / 65535);
+            $Oy = $offset % 65535;
+
+            if ($Ox) $offset_pattern = '(?:.{65535}){' . $Ox . '}';
+            $offset_pattern = '^(?:' . $offset_pattern . '.{' . $Oy . '})';
+        } else {
+            $offset_pattern = '^';                      // offset == 0; just anchor the pattern
+        }
+
+        // establish a pattern for length
+        if ($length === null) {
+            $length_pattern = '(.*)$';                  // the rest of the string
+        } else {
+
+            if (!isset($strlen)) $strlen = self::strlen($str);    // see notes
+            if ($offset > $strlen) return '';           // another trivial case
+
+            if ($length > 0) {
+
+                // reduce any length that would go past the end of the string
+                $length = min($strlen - $offset, $length);
+
+                $Lx = (int)($length / 65535);
+                $Ly = $length % 65535;
+
+                // +ve length requires ... a captured group of length characters
+                if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}';
+                $length_pattern = '(' . $length_pattern . '.{' . $Ly . '})';
+
+            } else if ($length < 0) {
+
+                if ($length < ($offset - $strlen)) return '';
+
+                $Lx = (int)((-$length) / 65535);
+                $Ly = (-$length) % 65535;
+
+                // -ve length requires ... capture everything except a group of -length characters
+                //                         anchored at the tail-end of the string
+                if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}';
+                $length_pattern = '(.*)(?:' . $length_pattern . '.{' . $Ly . '})$';
+            }
+        }
+
+        if (!preg_match('#' . $offset_pattern . $length_pattern . '#us', $str, $match)) return '';
+        return $match[1];
+    }
+
+    // phpcs:disable PSR1.Methods.CamelCapsMethodName.NotCamelCaps
+    /**
+     * Unicode aware replacement for substr_replace()
+     *
+     * @param string $string input string
+     * @param string $replacement the replacement
+     * @param int $start the replacing will begin at the start'th offset into string.
+     * @param int $length If given and is positive, it represents the length of the portion of string which is
+     *                            to be replaced. If length is zero then this function will have the effect of inserting
+     *                            replacement into string at the given start offset.
+     * @return string
+     * @see    substr_replace()
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    public static function substr_replace($string, $replacement, $start, $length = 0)
+    {
+        $ret = '';
+        if ($start > 0) $ret .= self::substr($string, 0, $start);
+        $ret .= $replacement;
+        $ret .= self::substr($string, $start + $length);
+        return $ret;
+    }
+    // phpcs:enable PSR1.Methods.CamelCapsMethodName.NotCamelCaps
+
+    /**
+     * Unicode aware replacement for ltrim()
+     *
+     * @param string $str
+     * @param string $charlist
+     * @return string
+     * @see    ltrim()
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    public static function ltrim($str, $charlist = '')
+    {
+        if ($charlist === '') return ltrim($str);
+
+        //quote charlist for use in a characterclass
+        $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\${1}', $charlist);
+
+        return preg_replace('/^[' . $charlist . ']+/u', '', $str);
+    }
+
+    /**
+     * Unicode aware replacement for rtrim()
+     *
+     * @param string $str
+     * @param string $charlist
+     * @return string
+     * @see    rtrim()
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    public static function rtrim($str, $charlist = '')
+    {
+        if ($charlist === '') return rtrim($str);
+
+        //quote charlist for use in a characterclass
+        $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\${1}', $charlist);
+
+        return preg_replace('/[' . $charlist . ']+$/u', '', $str);
+    }
+
+    /**
+     * Unicode aware replacement for trim()
+     *
+     * @param string $str
+     * @param string $charlist
+     * @return string
+     * @see    trim()
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    public static function trim($str, $charlist = '')
+    {
+        if ($charlist === '') return trim($str);
+
+        return self::ltrim(self::rtrim($str, $charlist), $charlist);
+    }
+
+    /**
+     * This is a unicode aware replacement for strtolower()
+     *
+     * Uses mb_string extension if available
+     *
+     * @param string $string
+     * @return string
+     * @see    \dokuwiki\Utf8\PhpString::strtoupper()
+     *
+     * @author Leo Feyer <leo@typolight.org>
+     * @see    strtolower()
+     */
+    public static function strtolower($string)
+    {
+        if (UTF8_MBSTRING) {
+            if (class_exists('Normalizer', $autoload = false)) {
+                return \Normalizer::normalize(mb_strtolower($string, 'utf-8'));
+            }
+            return (mb_strtolower($string, 'utf-8'));
+        }
+        return strtr($string, Table::upperCaseToLowerCase());
+    }
+
+    /**
+     * This is a unicode aware replacement for strtoupper()
+     *
+     * Uses mb_string extension if available
+     *
+     * @param string $string
+     * @return string
+     * @see    \dokuwiki\Utf8\PhpString::strtoupper()
+     *
+     * @author Leo Feyer <leo@typolight.org>
+     * @see    strtoupper()
+     */
+    public static function strtoupper($string)
+    {
+        if (UTF8_MBSTRING) return mb_strtoupper($string, 'utf-8');
+
+        return strtr($string, Table::lowerCaseToUpperCase());
+    }
+
+
+    /**
+     * UTF-8 aware alternative to ucfirst
+     * Make a string's first character uppercase
+     *
+     * @param string $str
+     * @return string with first character as upper case (if applicable)
+     * @author Harry Fuecks
+     *
+     */
+    public static function ucfirst($str)
+    {
+        switch (self::strlen($str)) {
+            case 0:
+                return '';
+            case 1:
+                return self::strtoupper($str);
+            default:
+                preg_match('/^(.{1})(.*)$/us', $str, $matches);
+                return self::strtoupper($matches[1]) . $matches[2];
+        }
+    }
+
+    /**
+     * UTF-8 aware alternative to ucwords
+     * Uppercase the first character of each word in a string
+     *
+     * @param string $str
+     * @return string with first char of each word uppercase
+     * @author Harry Fuecks
+     * @see http://php.net/ucwords
+     *
+     */
+    public static function ucwords($str)
+    {
+        // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches;
+        // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns
+        // This corresponds to the definition of a "word" defined at http://php.net/ucwords
+        $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u';
+
+        return preg_replace_callback(
+            $pattern,
+            function ($matches) {
+                $leadingws = $matches[2];
+                $ucfirst = self::strtoupper($matches[3]);
+                $ucword = self::substr_replace(ltrim($matches[0]), $ucfirst, 0, 1);
+                return $leadingws . $ucword;
+            },
+            $str
+        );
+    }
+
+    /**
+     * This is an Unicode aware replacement for strpos
+     *
+     * @param string $haystack
+     * @param string $needle
+     * @param integer $offset
+     * @return integer
+     * @author Leo Feyer <leo@typolight.org>
+     * @see    strpos()
+     *
+     */
+    public static function strpos($haystack, $needle, $offset = 0)
+    {
+        $comp = 0;
+        $length = null;
+
+        while ($length === null || $length < $offset) {
+            $pos = strpos($haystack, $needle, $offset + $comp);
+
+            if ($pos === false)
+                return false;
+
+            $length = self::strlen(substr($haystack, 0, $pos));
+
+            if ($length < $offset)
+                $comp = $pos - $length;
+        }
+
+        return $length;
+    }
+
+
+}
diff --git a/platform/www/inc/Utf8/Table.php b/platform/www/inc/Utf8/Table.php
new file mode 100644
index 0000000..8683c92
--- /dev/null
+++ b/platform/www/inc/Utf8/Table.php
@@ -0,0 +1,93 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * Provides static access to the UTF-8 conversion tables
+ *
+ * Lazy-Loads tables on first access
+ */
+class Table
+{
+
+    /**
+     * Get the upper to lower case conversion table
+     *
+     * @return array
+     */
+    public static function upperCaseToLowerCase()
+    {
+        static $table = null;
+        if ($table === null) $table = include __DIR__ . '/tables/case.php';
+        return $table;
+    }
+
+    /**
+     * Get the lower to upper case conversion table
+     *
+     * @return array
+     */
+    public static function lowerCaseToUpperCase()
+    {
+        static $table = null;
+        if ($table === null) {
+            $uclc = self::upperCaseToLowerCase();
+            $table = array_flip($uclc);
+        }
+        return $table;
+    }
+
+    /**
+     * Get the lower case accent table
+     * @return array
+     */
+    public static function lowerAccents()
+    {
+        static $table = null;
+        if ($table === null) {
+            $table = include __DIR__ . '/tables/loweraccents.php';
+        }
+        return $table;
+    }
+
+    /**
+     * Get the lower case accent table
+     * @return array
+     */
+    public static function upperAccents()
+    {
+        static $table = null;
+        if ($table === null) {
+            $table = include __DIR__ . '/tables/upperaccents.php';
+        }
+        return $table;
+    }
+
+    /**
+     * Get the romanization table
+     * @return array
+     */
+    public static function romanization()
+    {
+        static $table = null;
+        if ($table === null) {
+            $table = include __DIR__ . '/tables/romanization.php';
+        }
+        return $table;
+    }
+
+    /**
+     * Get the special chars as a concatenated string
+     * @return string
+     */
+    public static function specialChars()
+    {
+        static $string = null;
+        if ($string === null) {
+            $table = include __DIR__ . '/tables/specials.php';
+            // FIXME should we cache this to file system?
+            $string = Unicode::toUtf8($table);
+        }
+        return $string;
+    }
+}
diff --git a/platform/www/inc/Utf8/Unicode.php b/platform/www/inc/Utf8/Unicode.php
new file mode 100644
index 0000000..4b64265
--- /dev/null
+++ b/platform/www/inc/Utf8/Unicode.php
@@ -0,0 +1,277 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * Convert between UTF-8 and a list of Unicode Code Points
+ */
+class Unicode
+{
+
+    /**
+     * Takes an UTF-8 string and returns an array of ints representing the
+     * Unicode characters. Astral planes are supported ie. the ints in the
+     * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
+     * are not allowed.
+     *
+     * If $strict is set to true the function returns false if the input
+     * string isn't a valid UTF-8 octet sequence and raises a PHP error at
+     * level E_USER_WARNING
+     *
+     * Note: this function has been modified slightly in this library to
+     * trigger errors on encountering bad bytes
+     *
+     * @author <hsivonen@iki.fi>
+     * @author Harry Fuecks <hfuecks@gmail.com>
+     * @see    unicode_to_utf8
+     * @link   http://hsivonen.iki.fi/php-utf8/
+     * @link   http://sourceforge.net/projects/phputf8/
+     * @todo break into less complex chunks
+     * @todo use exceptions instead of user errors
+     *
+     * @param  string $str UTF-8 encoded string
+     * @param  boolean $strict Check for invalid sequences?
+     * @return mixed array of unicode code points or false if UTF-8 invalid
+     */
+    public static function fromUtf8($str, $strict = false)
+    {
+        $mState = 0;     // cached expected number of octets after the current octet
+        // until the beginning of the next UTF8 character sequence
+        $mUcs4 = 0;     // cached Unicode character
+        $mBytes = 1;     // cached expected number of octets in the current sequence
+
+        $out = array();
+
+        $len = strlen($str);
+
+        for ($i = 0; $i < $len; $i++) {
+
+            $in = ord($str[$i]);
+
+            if ($mState === 0) {
+
+                // When mState is zero we expect either a US-ASCII character or a
+                // multi-octet sequence.
+                if (0 === (0x80 & $in)) {
+                    // US-ASCII, pass straight through.
+                    $out[] = $in;
+                    $mBytes = 1;
+
+                } else if (0xC0 === (0xE0 & $in)) {
+                    // First octet of 2 octet sequence
+                    $mUcs4 = $in;
+                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
+                    $mState = 1;
+                    $mBytes = 2;
+
+                } else if (0xE0 === (0xF0 & $in)) {
+                    // First octet of 3 octet sequence
+                    $mUcs4 = $in;
+                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
+                    $mState = 2;
+                    $mBytes = 3;
+
+                } else if (0xF0 === (0xF8 & $in)) {
+                    // First octet of 4 octet sequence
+                    $mUcs4 = $in;
+                    $mUcs4 = ($mUcs4 & 0x07) << 18;
+                    $mState = 3;
+                    $mBytes = 4;
+
+                } else if (0xF8 === (0xFC & $in)) {
+                    /* First octet of 5 octet sequence.
+                     *
+                     * This is illegal because the encoded codepoint must be either
+                     * (a) not the shortest form or
+                     * (b) outside the Unicode range of 0-0x10FFFF.
+                     * Rather than trying to resynchronize, we will carry on until the end
+                     * of the sequence and let the later error handling code catch it.
+                     */
+                    $mUcs4 = $in;
+                    $mUcs4 = ($mUcs4 & 0x03) << 24;
+                    $mState = 4;
+                    $mBytes = 5;
+
+                } else if (0xFC === (0xFE & $in)) {
+                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
+                    $mUcs4 = $in;
+                    $mUcs4 = ($mUcs4 & 1) << 30;
+                    $mState = 5;
+                    $mBytes = 6;
+
+                } elseif ($strict) {
+                    /* Current octet is neither in the US-ASCII range nor a legal first
+                     * octet of a multi-octet sequence.
+                     */
+                    trigger_error(
+                        'utf8_to_unicode: Illegal sequence identifier ' .
+                        'in UTF-8 at byte ' . $i,
+                        E_USER_WARNING
+                    );
+                    return false;
+
+                }
+
+            } else {
+
+                // When mState is non-zero, we expect a continuation of the multi-octet
+                // sequence
+                if (0x80 === (0xC0 & $in)) {
+
+                    // Legal continuation.
+                    $shift = ($mState - 1) * 6;
+                    $tmp = $in;
+                    $tmp = ($tmp & 0x0000003F) << $shift;
+                    $mUcs4 |= $tmp;
+
+                    /**
+                     * End of the multi-octet sequence. mUcs4 now contains the final
+                     * Unicode codepoint to be output
+                     */
+                    if (0 === --$mState) {
+
+                        /*
+                         * Check for illegal sequences and codepoints.
+                         */
+                        // From Unicode 3.1, non-shortest form is illegal
+                        if (((2 === $mBytes) && ($mUcs4 < 0x0080)) ||
+                            ((3 === $mBytes) && ($mUcs4 < 0x0800)) ||
+                            ((4 === $mBytes) && ($mUcs4 < 0x10000)) ||
+                            (4 < $mBytes) ||
+                            // From Unicode 3.2, surrogate characters are illegal
+                            (($mUcs4 & 0xFFFFF800) === 0xD800) ||
+                            // Codepoints outside the Unicode range are illegal
+                            ($mUcs4 > 0x10FFFF)) {
+
+                            if ($strict) {
+                                trigger_error(
+                                    'utf8_to_unicode: Illegal sequence or codepoint ' .
+                                    'in UTF-8 at byte ' . $i,
+                                    E_USER_WARNING
+                                );
+
+                                return false;
+                            }
+
+                        }
+
+                        if (0xFEFF !== $mUcs4) {
+                            // BOM is legal but we don't want to output it
+                            $out[] = $mUcs4;
+                        }
+
+                        //initialize UTF8 cache
+                        $mState = 0;
+                        $mUcs4 = 0;
+                        $mBytes = 1;
+                    }
+
+                } elseif ($strict) {
+                    /**
+                     *((0xC0 & (*in) != 0x80) && (mState != 0))
+                     * Incomplete multi-octet sequence.
+                     */
+                    trigger_error(
+                        'utf8_to_unicode: Incomplete multi-octet ' .
+                        '   sequence in UTF-8 at byte ' . $i,
+                        E_USER_WARNING
+                    );
+
+                    return false;
+                }
+            }
+        }
+        return $out;
+    }
+
+    /**
+     * Takes an array of ints representing the Unicode characters and returns
+     * a UTF-8 string. Astral planes are supported ie. the ints in the
+     * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
+     * are not allowed.
+     *
+     * If $strict is set to true the function returns false if the input
+     * array contains ints that represent surrogates or are outside the
+     * Unicode range and raises a PHP error at level E_USER_WARNING
+     *
+     * Note: this function has been modified slightly in this library to use
+     * output buffering to concatenate the UTF-8 string (faster) as well as
+     * reference the array by it's keys
+     *
+     * @param  array $arr of unicode code points representing a string
+     * @param  boolean $strict Check for invalid sequences?
+     * @return string|false UTF-8 string or false if array contains invalid code points
+     *
+     * @author <hsivonen@iki.fi>
+     * @author Harry Fuecks <hfuecks@gmail.com>
+     * @see    utf8_to_unicode
+     * @link   http://hsivonen.iki.fi/php-utf8/
+     * @link   http://sourceforge.net/projects/phputf8/
+     * @todo use exceptions instead of user errors
+     */
+    public static function toUtf8($arr, $strict = false)
+    {
+        if (!is_array($arr)) return '';
+        ob_start();
+
+        foreach (array_keys($arr) as $k) {
+
+            if (($arr[$k] >= 0) && ($arr[$k] <= 0x007f)) {
+                # ASCII range (including control chars)
+
+                echo chr($arr[$k]);
+
+            } else if ($arr[$k] <= 0x07ff) {
+                # 2 byte sequence
+
+                echo chr(0xc0 | ($arr[$k] >> 6));
+                echo chr(0x80 | ($arr[$k] & 0x003f));
+
+            } else if ($arr[$k] == 0xFEFF) {
+                # Byte order mark (skip)
+                // nop -- zap the BOM
+
+            } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) {
+                # Test for illegal surrogates
+
+                // found a surrogate
+                if ($strict) {
+                    trigger_error(
+                        'unicode_to_utf8: Illegal surrogate ' .
+                        'at index: ' . $k . ', value: ' . $arr[$k],
+                        E_USER_WARNING
+                    );
+                    return false;
+                }
+
+            } else if ($arr[$k] <= 0xffff) {
+                # 3 byte sequence
+
+                echo chr(0xe0 | ($arr[$k] >> 12));
+                echo chr(0x80 | (($arr[$k] >> 6) & 0x003f));
+                echo chr(0x80 | ($arr[$k] & 0x003f));
+
+            } else if ($arr[$k] <= 0x10ffff) {
+                # 4 byte sequence
+
+                echo chr(0xf0 | ($arr[$k] >> 18));
+                echo chr(0x80 | (($arr[$k] >> 12) & 0x3f));
+                echo chr(0x80 | (($arr[$k] >> 6) & 0x3f));
+                echo chr(0x80 | ($arr[$k] & 0x3f));
+
+            } elseif ($strict) {
+
+                trigger_error(
+                    'unicode_to_utf8: Codepoint out of Unicode range ' .
+                    'at index: ' . $k . ', value: ' . $arr[$k],
+                    E_USER_WARNING
+                );
+
+                // out of range
+                return false;
+            }
+        }
+
+        return ob_get_clean();
+    }
+}
diff --git a/platform/www/inc/Utf8/tables/case.php b/platform/www/inc/Utf8/tables/case.php
new file mode 100644
index 0000000..6c41b58
--- /dev/null
+++ b/platform/www/inc/Utf8/tables/case.php
@@ -0,0 +1,659 @@
+<?php
+/**
+ * UTF-8 Case lookup table
+ *
+ * This lookuptable defines the lower case letters to their corresponding
+ * upper case letter in UTF-8
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ */
+return [
+    'A' => 'a',
+    'B' => 'b',
+    'C' => 'c',
+    'D' => 'd',
+    'E' => 'e',
+    'F' => 'f',
+    'G' => 'g',
+    'H' => 'h',
+    'I' => 'i',
+    'J' => 'j',
+    'K' => 'k',
+    'L' => 'l',
+    'M' => 'm',
+    'N' => 'n',
+    'O' => 'o',
+    'P' => 'p',
+    'Q' => 'q',
+    'R' => 'r',
+    'S' => 's',
+    'T' => 't',
+    'U' => 'u',
+    'V' => 'v',
+    'W' => 'w',
+    'X' => 'x',
+    'Y' => 'y',
+    'Z' => 'z',
+    'À' => 'à',
+    'Á' => 'á',
+    'Â' => 'â',
+    'Ã' => 'ã',
+    'Ä' => 'ä',
+    'Å' => 'å',
+    'Æ' => 'æ',
+    'Ç' => 'ç',
+    'È' => 'è',
+    'É' => 'é',
+    'Ê' => 'ê',
+    'Ë' => 'ë',
+    'Ì' => 'ì',
+    'Í' => 'í',
+    'Î' => 'î',
+    'Ï' => 'ï',
+    'Ð' => 'ð',
+    'Ñ' => 'ñ',
+    'Ò' => 'ò',
+    'Ó' => 'ó',
+    'Ô' => 'ô',
+    'Õ' => 'õ',
+    'Ö' => 'ö',
+    'Ø' => 'ø',
+    'Ù' => 'ù',
+    'Ú' => 'ú',
+    'Û' => 'û',
+    'Ü' => 'ü',
+    'Ý' => 'ý',
+    'Þ' => 'þ',
+    'Ā' => 'ā',
+    'Ă' => 'ă',
+    'Ą' => 'ą',
+    'Ć' => 'ć',
+    'Ĉ' => 'ĉ',
+    'Ċ' => 'ċ',
+    'Č' => 'č',
+    'Ď' => 'ď',
+    'Đ' => 'đ',
+    'Ē' => 'ē',
+    'Ĕ' => 'ĕ',
+    'Ė' => 'ė',
+    'Ę' => 'ę',
+    'Ě' => 'ě',
+    'Ĝ' => 'ĝ',
+    'Ğ' => 'ğ',
+    'Ġ' => 'ġ',
+    'Ģ' => 'ģ',
+    'Ĥ' => 'ĥ',
+    'Ħ' => 'ħ',
+    'Ĩ' => 'ĩ',
+    'Ī' => 'ī',
+    'Ĭ' => 'ĭ',
+    'Į' => 'į',
+    'Ĳ' => 'ĳ',
+    'Ĵ' => 'ĵ',
+    'Ķ' => 'ķ',
+    'Ĺ' => 'ĺ',
+    'Ļ' => 'ļ',
+    'Ľ' => 'ľ',
+    'Ŀ' => 'ŀ',
+    'Ł' => 'ł',
+    'Ń' => 'ń',
+    'Ņ' => 'ņ',
+    'Ň' => 'ň',
+    'Ŋ' => 'ŋ',
+    'Ō' => 'ō',
+    'Ŏ' => 'ŏ',
+    'Ő' => 'ő',
+    'Œ' => 'œ',
+    'Ŕ' => 'ŕ',
+    'Ŗ' => 'ŗ',
+    'Ř' => 'ř',
+    'Ś' => 'ś',
+    'Ŝ' => 'ŝ',
+    'Ş' => 'ş',
+    'Š' => 'š',
+    'Ţ' => 'ţ',
+    'Ť' => 'ť',
+    'Ŧ' => 'ŧ',
+    'Ũ' => 'ũ',
+    'Ū' => 'ū',
+    'Ŭ' => 'ŭ',
+    'Ů' => 'ů',
+    'Ű' => 'ű',
+    'Ų' => 'ų',
+    'Ŵ' => 'ŵ',
+    'Ŷ' => 'ŷ',
+    'Ÿ' => 'ÿ',
+    'Ź' => 'ź',
+    'Ż' => 'ż',
+    'Ž' => 'ž',
+    'Ɓ' => 'ɓ',
+    'Ƃ' => 'ƃ',
+    'Ƅ' => 'ƅ',
+    'Ɔ' => 'ɔ',
+    'Ƈ' => 'ƈ',
+    'Ɖ' => 'ɖ',
+    'Ɗ' => 'ɗ',
+    'Ƌ' => 'ƌ',
+    'Ǝ' => 'ǝ',
+    'Ə' => 'ə',
+    'Ɛ' => 'ɛ',
+    'Ƒ' => 'ƒ',
+    'Ɣ' => 'ɣ',
+    'Ɩ' => 'ɩ',
+    'Ɨ' => 'ɨ',
+    'Ƙ' => 'ƙ',
+    'Ɯ' => 'ɯ',
+    'Ɲ' => 'ɲ',
+    'Ɵ' => 'ɵ',
+    'Ơ' => 'ơ',
+    'Ƣ' => 'ƣ',
+    'Ƥ' => 'ƥ',
+    'Ʀ' => 'ʀ',
+    'Ƨ' => 'ƨ',
+    'Ʃ' => 'ʃ',
+    'Ƭ' => 'ƭ',
+    'Ʈ' => 'ʈ',
+    'Ư' => 'ư',
+    'Ʊ' => 'ʊ',
+    'Ʋ' => 'ʋ',
+    'Ƴ' => 'ƴ',
+    'Ƶ' => 'ƶ',
+    'Ʒ' => 'ʒ',
+    'Ƹ' => 'ƹ',
+    'Ƽ' => 'ƽ',
+    'ǅ' => 'ǆ',
+    'ǈ' => 'ǉ',
+    'ǋ' => 'ǌ',
+    'Ǎ' => 'ǎ',
+    'Ǐ' => 'ǐ',
+    'Ǒ' => 'ǒ',
+    'Ǔ' => 'ǔ',
+    'Ǖ' => 'ǖ',
+    'Ǘ' => 'ǘ',
+    'Ǚ' => 'ǚ',
+    'Ǜ' => 'ǜ',
+    'Ǟ' => 'ǟ',
+    'Ǡ' => 'ǡ',
+    'Ǣ' => 'ǣ',
+    'Ǥ' => 'ǥ',
+    'Ǧ' => 'ǧ',
+    'Ǩ' => 'ǩ',
+    'Ǫ' => 'ǫ',
+    'Ǭ' => 'ǭ',
+    'Ǯ' => 'ǯ',
+    'ǲ' => 'ǳ',
+    'Ǵ' => 'ǵ',
+    'Ƕ' => 'ƕ',
+    'Ƿ' => 'ƿ',
+    'Ǹ' => 'ǹ',
+    'Ǻ' => 'ǻ',
+    'Ǽ' => 'ǽ',
+    'Ǿ' => 'ǿ',
+    'Ȁ' => 'ȁ',
+    'Ȃ' => 'ȃ',
+    'Ȅ' => 'ȅ',
+    'Ȇ' => 'ȇ',
+    'Ȉ' => 'ȉ',
+    'Ȋ' => 'ȋ',
+    'Ȍ' => 'ȍ',
+    'Ȏ' => 'ȏ',
+    'Ȑ' => 'ȑ',
+    'Ȓ' => 'ȓ',
+    'Ȕ' => 'ȕ',
+    'Ȗ' => 'ȗ',
+    'Ș' => 'ș',
+    'Ț' => 'ț',
+    'Ȝ' => 'ȝ',
+    'Ȟ' => 'ȟ',
+    'Ƞ' => 'ƞ',
+    'Ȣ' => 'ȣ',
+    'Ȥ' => 'ȥ',
+    'Ȧ' => 'ȧ',
+    'Ȩ' => 'ȩ',
+    'Ȫ' => 'ȫ',
+    'Ȭ' => 'ȭ',
+    'Ȯ' => 'ȯ',
+    'Ȱ' => 'ȱ',
+    'Ȳ' => 'ȳ',
+    'Ά' => 'ά',
+    'Έ' => 'έ',
+    'Ή' => 'ή',
+    'Ί' => 'ί',
+    'Ό' => 'ό',
+    'Ύ' => 'ύ',
+    'Ώ' => 'ώ',
+    'Α' => 'α',
+    'Β' => 'β',
+    'Γ' => 'γ',
+    'Δ' => 'δ',
+    'Ε' => 'ε',
+    'Ζ' => 'ζ',
+    'Η' => 'η',
+    'Θ' => 'θ',
+    'Ι' => 'ι',
+    'Κ' => 'κ',
+    'Λ' => 'λ',
+    'Μ' => 'μ',
+    'Ν' => 'ν',
+    'Ξ' => 'ξ',
+    'Ο' => 'ο',
+    'Π' => 'π',
+    'Ρ' => 'ρ',
+    'Σ' => 'σ',
+    'Τ' => 'τ',
+    'Υ' => 'υ',
+    'Φ' => 'φ',
+    'Χ' => 'χ',
+    'Ψ' => 'ψ',
+    'Ω' => 'ω',
+    'Ϊ' => 'ϊ',
+    'Ϋ' => 'ϋ',
+    'Ϙ' => 'ϙ',
+    'Ϛ' => 'ϛ',
+    'Ϝ' => 'ϝ',
+    'Ϟ' => 'ϟ',
+    'Ϡ' => 'ϡ',
+    'Ϣ' => 'ϣ',
+    'Ϥ' => 'ϥ',
+    'Ϧ' => 'ϧ',
+    'Ϩ' => 'ϩ',
+    'Ϫ' => 'ϫ',
+    'Ϭ' => 'ϭ',
+    'Ϯ' => 'ϯ',
+    'Ѐ' => 'ѐ',
+    'Ё' => 'ё',
+    'Ђ' => 'ђ',
+    'Ѓ' => 'ѓ',
+    'Є' => 'є',
+    'Ѕ' => 'ѕ',
+    'І' => 'і',
+    'Ї' => 'ї',
+    'Ј' => 'ј',
+    'Љ' => 'љ',
+    'Њ' => 'њ',
+    'Ћ' => 'ћ',
+    'Ќ' => 'ќ',
+    'Ѝ' => 'ѝ',
+    'Ў' => 'ў',
+    'Џ' => 'џ',
+    'А' => 'а',
+    'Б' => 'б',
+    'В' => 'в',
+    'Г' => 'г',
+    'Д' => 'д',
+    'Е' => 'е',
+    'Ж' => 'ж',
+    'З' => 'з',
+    'И' => 'и',
+    'Й' => 'й',
+    'К' => 'к',
+    'Л' => 'л',
+    'М' => 'м',
+    'Н' => 'н',
+    'О' => 'о',
+    'П' => 'п',
+    'Р' => 'р',
+    'С' => 'с',
+    'Т' => 'т',
+    'У' => 'у',
+    'Ф' => 'ф',
+    'Х' => 'х',
+    'Ц' => 'ц',
+    'Ч' => 'ч',
+    'Ш' => 'ш',
+    'Щ' => 'щ',
+    'Ъ' => 'ъ',
+    'Ы' => 'ы',
+    'Ь' => 'ь',
+    'Э' => 'э',
+    'Ю' => 'ю',
+    'Я' => 'я',
+    'Ѡ' => 'ѡ',
+    'Ѣ' => 'ѣ',
+    'Ѥ' => 'ѥ',
+    'Ѧ' => 'ѧ',
+    'Ѩ' => 'ѩ',
+    'Ѫ' => 'ѫ',
+    'Ѭ' => 'ѭ',
+    'Ѯ' => 'ѯ',
+    'Ѱ' => 'ѱ',
+    'Ѳ' => 'ѳ',
+    'Ѵ' => 'ѵ',
+    'Ѷ' => 'ѷ',
+    'Ѹ' => 'ѹ',
+    'Ѻ' => 'ѻ',
+    'Ѽ' => 'ѽ',
+    'Ѿ' => 'ѿ',
+    'Ҁ' => 'ҁ',
+    'Ҋ' => 'ҋ',
+    'Ҍ' => 'ҍ',
+    'Ҏ' => 'ҏ',
+    'Ґ' => 'ґ',
+    'Ғ' => 'ғ',
+    'Ҕ' => 'ҕ',
+    'Җ' => 'җ',
+    'Ҙ' => 'ҙ',
+    'Қ' => 'қ',
+    'Ҝ' => 'ҝ',
+    'Ҟ' => 'ҟ',
+    'Ҡ' => 'ҡ',
+    'Ң' => 'ң',
+    'Ҥ' => 'ҥ',
+    'Ҧ' => 'ҧ',
+    'Ҩ' => 'ҩ',
+    'Ҫ' => 'ҫ',
+    'Ҭ' => 'ҭ',
+    'Ү' => 'ү',
+    'Ұ' => 'ұ',
+    'Ҳ' => 'ҳ',
+    'Ҵ' => 'ҵ',
+    'Ҷ' => 'ҷ',
+    'Ҹ' => 'ҹ',
+    'Һ' => 'һ',
+    'Ҽ' => 'ҽ',
+    'Ҿ' => 'ҿ',
+    'Ӂ' => 'ӂ',
+    'Ӄ' => 'ӄ',
+    'Ӆ' => 'ӆ',
+    'Ӈ' => 'ӈ',
+    'Ӊ' => 'ӊ',
+    'Ӌ' => 'ӌ',
+    'Ӎ' => 'ӎ',
+    'Ӑ' => 'ӑ',
+    'Ӓ' => 'ӓ',
+    'Ӕ' => 'ӕ',
+    'Ӗ' => 'ӗ',
+    'Ә' => 'ә',
+    'Ӛ' => 'ӛ',
+    'Ӝ' => 'ӝ',
+    'Ӟ' => 'ӟ',
+    'Ӡ' => 'ӡ',
+    'Ӣ' => 'ӣ',
+    'Ӥ' => 'ӥ',
+    'Ӧ' => 'ӧ',
+    'Ө' => 'ө',
+    'Ӫ' => 'ӫ',
+    'Ӭ' => 'ӭ',
+    'Ӯ' => 'ӯ',
+    'Ӱ' => 'ӱ',
+    'Ӳ' => 'ӳ',
+    'Ӵ' => 'ӵ',
+    'Ӹ' => 'ӹ',
+    'Ԁ' => 'ԁ',
+    'Ԃ' => 'ԃ',
+    'Ԅ' => 'ԅ',
+    'Ԇ' => 'ԇ',
+    'Ԉ' => 'ԉ',
+    'Ԋ' => 'ԋ',
+    'Ԍ' => 'ԍ',
+    'Ԏ' => 'ԏ',
+    'Ա' => 'ա',
+    'Բ' => 'բ',
+    'Գ' => 'գ',
+    'Դ' => 'դ',
+    'Ե' => 'ե',
+    'Զ' => 'զ',
+    'Է' => 'է',
+    'Ը' => 'ը',
+    'Թ' => 'թ',
+    'Ժ' => 'ժ',
+    'Ի' => 'ի',
+    'Լ' => 'լ',
+    'Խ' => 'խ',
+    'Ծ' => 'ծ',
+    'Կ' => 'կ',
+    'Հ' => 'հ',
+    'Ձ' => 'ձ',
+    'Ղ' => 'ղ',
+    'Ճ' => 'ճ',
+    'Մ' => 'մ',
+    'Յ' => 'յ',
+    'Ն' => 'ն',
+    'Շ' => 'շ',
+    'Ո' => 'ո',
+    'Չ' => 'չ',
+    'Պ' => 'պ',
+    'Ջ' => 'ջ',
+    'Ռ' => 'ռ',
+    'Ս' => 'ս',
+    'Վ' => 'վ',
+    'Տ' => 'տ',
+    'Ր' => 'ր',
+    'Ց' => 'ց',
+    'Ւ' => 'ւ',
+    'Փ' => 'փ',
+    'Ք' => 'ք',
+    'Օ' => 'օ',
+    'Ֆ' => 'ֆ',
+    'Ḁ' => 'ḁ',
+    'Ḃ' => 'ḃ',
+    'Ḅ' => 'ḅ',
+    'Ḇ' => 'ḇ',
+    'Ḉ' => 'ḉ',
+    'Ḋ' => 'ḋ',
+    'Ḍ' => 'ḍ',
+    'Ḏ' => 'ḏ',
+    'Ḑ' => 'ḑ',
+    'Ḓ' => 'ḓ',
+    'Ḕ' => 'ḕ',
+    'Ḗ' => 'ḗ',
+    'Ḙ' => 'ḙ',
+    'Ḛ' => 'ḛ',
+    'Ḝ' => 'ḝ',
+    'Ḟ' => 'ḟ',
+    'Ḡ' => 'ḡ',
+    'Ḣ' => 'ḣ',
+    'Ḥ' => 'ḥ',
+    'Ḧ' => 'ḧ',
+    'Ḩ' => 'ḩ',
+    'Ḫ' => 'ḫ',
+    'Ḭ' => 'ḭ',
+    'Ḯ' => 'ḯ',
+    'Ḱ' => 'ḱ',
+    'Ḳ' => 'ḳ',
+    'Ḵ' => 'ḵ',
+    'Ḷ' => 'ḷ',
+    'Ḹ' => 'ḹ',
+    'Ḻ' => 'ḻ',
+    'Ḽ' => 'ḽ',
+    'Ḿ' => 'ḿ',
+    'Ṁ' => 'ṁ',
+    'Ṃ' => 'ṃ',
+    'Ṅ' => 'ṅ',
+    'Ṇ' => 'ṇ',
+    'Ṉ' => 'ṉ',
+    'Ṋ' => 'ṋ',
+    'Ṍ' => 'ṍ',
+    'Ṏ' => 'ṏ',
+    'Ṑ' => 'ṑ',
+    'Ṓ' => 'ṓ',
+    'Ṕ' => 'ṕ',
+    'Ṗ' => 'ṗ',
+    'Ṙ' => 'ṙ',
+    'Ṛ' => 'ṛ',
+    'Ṝ' => 'ṝ',
+    'Ṟ' => 'ṟ',
+    'Ṡ' => 'ṡ',
+    'Ṣ' => 'ṣ',
+    'Ṥ' => 'ṥ',
+    'Ṧ' => 'ṧ',
+    'Ṩ' => 'ṩ',
+    'Ṫ' => 'ṫ',
+    'Ṭ' => 'ṭ',
+    'Ṯ' => 'ṯ',
+    'Ṱ' => 'ṱ',
+    'Ṳ' => 'ṳ',
+    'Ṵ' => 'ṵ',
+    'Ṷ' => 'ṷ',
+    'Ṹ' => 'ṹ',
+    'Ṻ' => 'ṻ',
+    'Ṽ' => 'ṽ',
+    'Ṿ' => 'ṿ',
+    'Ẁ' => 'ẁ',
+    'Ẃ' => 'ẃ',
+    'Ẅ' => 'ẅ',
+    'Ẇ' => 'ẇ',
+    'Ẉ' => 'ẉ',
+    'Ẋ' => 'ẋ',
+    'Ẍ' => 'ẍ',
+    'Ẏ' => 'ẏ',
+    'Ẑ' => 'ẑ',
+    'Ẓ' => 'ẓ',
+    'Ẕ' => 'ẕ',
+    'Ạ' => 'ạ',
+    'Ả' => 'ả',
+    'Ấ' => 'ấ',
+    'Ầ' => 'ầ',
+    'Ẩ' => 'ẩ',
+    'Ẫ' => 'ẫ',
+    'Ậ' => 'ậ',
+    'Ắ' => 'ắ',
+    'Ằ' => 'ằ',
+    'Ẳ' => 'ẳ',
+    'Ẵ' => 'ẵ',
+    'Ặ' => 'ặ',
+    'Ẹ' => 'ẹ',
+    'Ẻ' => 'ẻ',
+    'Ẽ' => 'ẽ',
+    'Ế' => 'ế',
+    'Ề' => 'ề',
+    'Ể' => 'ể',
+    'Ễ' => 'ễ',
+    'Ệ' => 'ệ',
+    'Ỉ' => 'ỉ',
+    'Ị' => 'ị',
+    'Ọ' => 'ọ',
+    'Ỏ' => 'ỏ',
+    'Ố' => 'ố',
+    'Ồ' => 'ồ',
+    'Ổ' => 'ổ',
+    'Ỗ' => 'ỗ',
+    'Ộ' => 'ộ',
+    'Ớ' => 'ớ',
+    'Ờ' => 'ờ',
+    'Ở' => 'ở',
+    'Ỡ' => 'ỡ',
+    'Ợ' => 'ợ',
+    'Ụ' => 'ụ',
+    'Ủ' => 'ủ',
+    'Ứ' => 'ứ',
+    'Ừ' => 'ừ',
+    'Ử' => 'ử',
+    'Ữ' => 'ữ',
+    'Ự' => 'ự',
+    'Ỳ' => 'ỳ',
+    'Ỵ' => 'ỵ',
+    'Ỷ' => 'ỷ',
+    'Ỹ' => 'ỹ',
+    'Ἀ' => 'ἀ',
+    'Ἁ' => 'ἁ',
+    'Ἂ' => 'ἂ',
+    'Ἃ' => 'ἃ',
+    'Ἄ' => 'ἄ',
+    'Ἅ' => 'ἅ',
+    'Ἆ' => 'ἆ',
+    'Ἇ' => 'ἇ',
+    'Ἐ' => 'ἐ',
+    'Ἑ' => 'ἑ',
+    'Ἒ' => 'ἒ',
+    'Ἓ' => 'ἓ',
+    'Ἔ' => 'ἔ',
+    'Ἕ' => 'ἕ',
+    'Ἡ' => 'ἡ',
+    'Ἢ' => 'ἢ',
+    'Ἣ' => 'ἣ',
+    'Ἤ' => 'ἤ',
+    'Ἥ' => 'ἥ',
+    'Ἦ' => 'ἦ',
+    'Ἧ' => 'ἧ',
+    'Ἰ' => 'ἰ',
+    'Ἱ' => 'ἱ',
+    'Ἲ' => 'ἲ',
+    'Ἳ' => 'ἳ',
+    'Ἴ' => 'ἴ',
+    'Ἵ' => 'ἵ',
+    'Ἶ' => 'ἶ',
+    'Ἷ' => 'ἷ',
+    'Ὀ' => 'ὀ',
+    'Ὁ' => 'ὁ',
+    'Ὂ' => 'ὂ',
+    'Ὃ' => 'ὃ',
+    'Ὄ' => 'ὄ',
+    'Ὅ' => 'ὅ',
+    'Ὑ' => 'ὑ',
+    'Ὓ' => 'ὓ',
+    'Ὕ' => 'ὕ',
+    'Ὗ' => 'ὗ',
+    'Ὡ' => 'ὡ',
+    'Ὢ' => 'ὢ',
+    'Ὣ' => 'ὣ',
+    'Ὤ' => 'ὤ',
+    'Ὥ' => 'ὥ',
+    'Ὦ' => 'ὦ',
+    'Ὧ' => 'ὧ',
+    'ᾈ' => 'ᾀ',
+    'ᾉ' => 'ᾁ',
+    'ᾊ' => 'ᾂ',
+    'ᾋ' => 'ᾃ',
+    'ᾌ' => 'ᾄ',
+    'ᾍ' => 'ᾅ',
+    'ᾎ' => 'ᾆ',
+    'ᾏ' => 'ᾇ',
+    'ᾘ' => 'ᾐ',
+    'ᾙ' => 'ᾑ',
+    'ᾚ' => 'ᾒ',
+    'ᾛ' => 'ᾓ',
+    'ᾜ' => 'ᾔ',
+    'ᾝ' => 'ᾕ',
+    'ᾞ' => 'ᾖ',
+    'ᾟ' => 'ᾗ',
+    'ᾩ' => 'ᾡ',
+    'ᾪ' => 'ᾢ',
+    'ᾫ' => 'ᾣ',
+    'ᾬ' => 'ᾤ',
+    'ᾭ' => 'ᾥ',
+    'ᾮ' => 'ᾦ',
+    'ᾯ' => 'ᾧ',
+    'Ᾰ' => 'ᾰ',
+    'Ᾱ' => 'ᾱ',
+    'Ὰ' => 'ὰ',
+    'ᾼ' => 'ᾳ',
+    'Ὲ' => 'ὲ',
+    'Ὴ' => 'ὴ',
+    'ῌ' => 'ῃ',
+    'Ῐ' => 'ῐ',
+    'Ῑ' => 'ῑ',
+    'Ὶ' => 'ὶ',
+    'Ῡ' => 'ῡ',
+    'Ὺ' => 'ὺ',
+    'Ῥ' => 'ῥ',
+    'Ὸ' => 'ὸ',
+    'Ὼ' => 'ὼ',
+    'ῼ' => 'ῳ',
+    'Ａ' => 'ａ',
+    'Ｂ' => 'ｂ',
+    'Ｃ' => 'ｃ',
+    'Ｄ' => 'ｄ',
+    'Ｅ' => 'ｅ',
+    'Ｆ' => 'ｆ',
+    'Ｇ' => 'ｇ',
+    'Ｈ' => 'ｈ',
+    'Ｉ' => 'ｉ',
+    'Ｊ' => 'ｊ',
+    'Ｋ' => 'ｋ',
+    'Ｌ' => 'ｌ',
+    'Ｍ' => 'ｍ',
+    'Ｎ' => 'ｎ',
+    'Ｏ' => 'ｏ',
+    'Ｐ' => 'ｐ',
+    'Ｑ' => 'ｑ',
+    'Ｒ' => 'ｒ',
+    'Ｓ' => 'ｓ',
+    'Ｔ' => 'ｔ',
+    'Ｕ' => 'ｕ',
+    'Ｖ' => 'ｖ',
+    'Ｗ' => 'ｗ',
+    'Ｘ' => 'ｘ',
+    'Ｙ' => 'ｙ',
+    'Ｚ' => 'ｚ',
+];
diff --git a/platform/www/inc/Utf8/tables/loweraccents.php b/platform/www/inc/Utf8/tables/loweraccents.php
new file mode 100644
index 0000000..cc3ec8e
--- /dev/null
+++ b/platform/www/inc/Utf8/tables/loweraccents.php
@@ -0,0 +1,116 @@
+<?php
+/**
+ * UTF-8 lookup table for lower case accented letters
+ *
+ * This lookuptable defines replacements for accented characters from the ASCII-7
+ * range. This are lower case letters only.
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ * @see    \dokuwiki\Utf8\Clean::deaccent()
+ */
+return [
+    'á' => 'a',
+    'à' => 'a',
+    'ă' => 'a',
+    'â' => 'a',
+    'å' => 'a',
+    'ä' => 'ae',
+    'ã' => 'a',
+    'ą' => 'a',
+    'ā' => 'a',
+    'æ' => 'ae',
+    'ḃ' => 'b',
+    'ć' => 'c',
+    'ĉ' => 'c',
+    'č' => 'c',
+    'ċ' => 'c',
+    'ç' => 'c',
+    'ď' => 'd',
+    'ḋ' => 'd',
+    'đ' => 'd',
+    'ð' => 'dh',
+    'é' => 'e',
+    'è' => 'e',
+    'ĕ' => 'e',
+    'ê' => 'e',
+    'ě' => 'e',
+    'ë' => 'e',
+    'ė' => 'e',
+    'ę' => 'e',
+    'ē' => 'e',
+    'ḟ' => 'f',
+    'ƒ' => 'f',
+    'ğ' => 'g',
+    'ĝ' => 'g',
+    'ġ' => 'g',
+    'ģ' => 'g',
+    'ĥ' => 'h',
+    'ħ' => 'h',
+    'í' => 'i',
+    'ì' => 'i',
+    'î' => 'i',
+    'ï' => 'i',
+    'ĩ' => 'i',
+    'į' => 'i',
+    'ī' => 'i',
+    'ĵ' => 'j',
+    'ķ' => 'k',
+    'ĺ' => 'l',
+    'ľ' => 'l',
+    'ļ' => 'l',
+    'ł' => 'l',
+    'ṁ' => 'm',
+    'ń' => 'n',
+    'ň' => 'n',
+    'ñ' => 'n',
+    'ņ' => 'n',
+    'ó' => 'o',
+    'ò' => 'o',
+    'ô' => 'o',
+    'ö' => 'oe',
+    'ő' => 'o',
+    'õ' => 'o',
+    'ø' => 'o',
+    'ō' => 'o',
+    'ơ' => 'o',
+    'ṗ' => 'p',
+    'ŕ' => 'r',
+    'ř' => 'r',
+    'ŗ' => 'r',
+    'ś' => 's',
+    'ŝ' => 's',
+    'š' => 's',
+    'ṡ' => 's',
+    'ş' => 's',
+    'ș' => 's',
+    'ß' => 'ss',
+    'ť' => 't',
+    'ṫ' => 't',
+    'ţ' => 't',
+    'ț' => 't',
+    'ŧ' => 't',
+    'ú' => 'u',
+    'ù' => 'u',
+    'ŭ' => 'u',
+    'û' => 'u',
+    'ů' => 'u',
+    'ü' => 'ue',
+    'ű' => 'u',
+    'ũ' => 'u',
+    'ų' => 'u',
+    'ū' => 'u',
+    'ư' => 'u',
+    'ẃ' => 'w',
+    'ẁ' => 'w',
+    'ŵ' => 'w',
+    'ẅ' => 'w',
+    'ý' => 'y',
+    'ỳ' => 'y',
+    'ŷ' => 'y',
+    'ÿ' => 'y',
+    'ź' => 'z',
+    'ž' => 'z',
+    'ż' => 'z',
+    'þ' => 'th',
+    'µ' => 'u',
+];
diff --git a/platform/www/inc/Utf8/tables/romanization.php b/platform/www/inc/Utf8/tables/romanization.php
new file mode 100644
index 0000000..e757b9c
--- /dev/null
+++ b/platform/www/inc/Utf8/tables/romanization.php
@@ -0,0 +1,1458 @@
+<?php
+/**
+ * Romanization lookup table
+ *
+ * This lookup tables provides a way to transform strings written in a language
+ * different from the ones based upon latin letters into plain ASCII.
+ *
+ * Please note: this is not a scientific transliteration table. It only works
+ * oneway from nonlatin to ASCII and it works by simple character replacement
+ * only. Specialities of each language are not supported.
+ *
+ * @todo some keys are used multiple times
+ * @todo remove or integrate commented pairs
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ * @author Vitaly Blokhin <vitinfo@vitn.com>
+ * @author Bisqwit <bisqwit@iki.fi>
+ * @author Arthit Suriyawongkul <arthit@gmail.com>
+ * @author Denis Scheither <amorphis@uni-bremen.de>
+ * @author Eivind Morland <eivind.morland@gmail.com>
+ * @link   http://www.uconv.com/translit.htm
+ * @link   http://kanjidict.stc.cx/hiragana.php?src=2
+ * @link   http://www.translatum.gr/converter/greek-transliteration.htm
+ * @link   http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription
+ * @link   http://www.btranslations.com/resources/romanization/korean.asp
+ */
+return [
+    // scandinavian - differs from what we do in deaccent
+    'å' => 'a',
+    'Å' => 'A',
+    'ä' => 'a',
+    'Ä' => 'A',
+    'ö' => 'o',
+    'Ö' => 'O',
+
+    //russian cyrillic
+    'а' => 'a',
+    'А' => 'A',
+    'б' => 'b',
+    'Б' => 'B',
+    'в' => 'v',
+    'В' => 'V',
+    'г' => 'g',
+    'Г' => 'G',
+    'д' => 'd',
+    'Д' => 'D',
+    'е' => 'e',
+    'Е' => 'E',
+    'ё' => 'jo',
+    'Ё' => 'Jo',
+    'ж' => 'zh',
+    'Ж' => 'Zh',
+    'з' => 'z',
+    'З' => 'Z',
+    'и' => 'i',
+    'И' => 'I',
+    'й' => 'j',
+    'Й' => 'J',
+    'к' => 'k',
+    'К' => 'K',
+    'л' => 'l',
+    'Л' => 'L',
+    'м' => 'm',
+    'М' => 'M',
+    'н' => 'n',
+    'Н' => 'N',
+    'о' => 'o',
+    'О' => 'O',
+    'п' => 'p',
+    'П' => 'P',
+    'р' => 'r',
+    'Р' => 'R',
+    'с' => 's',
+    'С' => 'S',
+    'т' => 't',
+    'Т' => 'T',
+    'у' => 'u',
+    'У' => 'U',
+    'ф' => 'f',
+    'Ф' => 'F',
+    'х' => 'x',
+    'Х' => 'X',
+    'ц' => 'c',
+    'Ц' => 'C',
+    'ч' => 'ch',
+    'Ч' => 'Ch',
+    'ш' => 'sh',
+    'Ш' => 'Sh',
+    'щ' => 'sch',
+    'Щ' => 'Sch',
+    'ъ' => '',
+    'Ъ' => '',
+    'ы' => 'y',
+    'Ы' => 'Y',
+    'ь' => '',
+    'Ь' => '',
+    'э' => 'eh',
+    'Э' => 'Eh',
+    'ю' => 'ju',
+    'Ю' => 'Ju',
+    'я' => 'ja',
+    'Я' => 'Ja',
+
+    // Ukrainian cyrillic
+    'Ґ' => 'Gh',
+    'ґ' => 'gh',
+    'Є' => 'Je',
+    'є' => 'je',
+    'І' => 'I',
+    'і' => 'i',
+    'Ї' => 'Ji',
+    'ї' => 'ji',
+
+    // Georgian
+    'ა' => 'a',
+    'ბ' => 'b',
+    'გ' => 'g',
+    'დ' => 'd',
+    'ე' => 'e',
+    'ვ' => 'v',
+    'ზ' => 'z',
+    'თ' => 'th',
+    'ი' => 'i',
+    'კ' => 'p',
+    'ლ' => 'l',
+    'მ' => 'm',
+    'ნ' => 'n',
+    'ო' => 'o',
+    'პ' => 'p',
+    'ჟ' => 'zh',
+    'რ' => 'r',
+    'ს' => 's',
+    'ტ' => 't',
+    'უ' => 'u',
+    'ფ' => 'ph',
+    'ქ' => 'kh',
+    'ღ' => 'gh',
+    'ყ' => 'q',
+    'შ' => 'sh',
+    'ჩ' => 'ch',
+    'ც' => 'c',
+    'ძ' => 'dh',
+    'წ' => 'w',
+    'ჭ' => 'j',
+    'ხ' => 'x',
+    'ჯ' => 'jh',
+    'ჰ' => 'xh',
+
+    //Sanskrit
+    'अ' => 'a',
+    'आ' => 'ah',
+    'इ' => 'i',
+    'ई' => 'ih',
+    'उ' => 'u',
+    'ऊ' => 'uh',
+    'ऋ' => 'ry',
+    'ॠ' => 'ryh',
+    'ऌ' => 'ly',
+    'ॡ' => 'lyh',
+    'ए' => 'e',
+    'ऐ' => 'ay',
+    'ओ' => 'o',
+    'औ' => 'aw',
+    'अं' => 'amh',
+    'अः' => 'aq',
+    'क' => 'k',
+    'ख' => 'kh',
+    'ग' => 'g',
+    'घ' => 'gh',
+    'ङ' => 'nh',
+    'च' => 'c',
+    'छ' => 'ch',
+    'ज' => 'j',
+    'झ' => 'jh',
+    'ञ' => 'ny',
+    'ट' => 'tq',
+    'ठ' => 'tqh',
+    'ड' => 'dq',
+    'ढ' => 'dqh',
+    'ण' => 'nq',
+    'त' => 't',
+    'थ' => 'th',
+    'द' => 'd',
+    'ध' => 'dh',
+    'न' => 'n',
+    'प' => 'p',
+    'फ' => 'ph',
+    'ब' => 'b',
+    'भ' => 'bh',
+    'म' => 'm',
+    'य' => 'z',
+    'र' => 'r',
+    'ल' => 'l',
+    'व' => 'v',
+    'श' => 'sh',
+    'ष' => 'sqh',
+    'स' => 's',
+    'ह' => 'x',
+
+    //Sanskrit diacritics
+    'Ā' => 'A',
+    'Ī' => 'I',
+    'Ū' => 'U',
+    'Ṛ' => 'R',
+    'Ṝ' => 'R',
+    'Ṅ' => 'N',
+    'Ñ' => 'N',
+    'Ṭ' => 'T',
+    'Ḍ' => 'D',
+    'Ṇ' => 'N',
+    'Ś' => 'S',
+    'Ṣ' => 'S',
+    'Ṁ' => 'M',
+    'Ṃ' => 'M',
+    'Ḥ' => 'H',
+    'Ḷ' => 'L',
+    'Ḹ' => 'L',
+    'ā' => 'a',
+    'ī' => 'i',
+    'ū' => 'u',
+    'ṛ' => 'r',
+    'ṝ' => 'r',
+    'ṅ' => 'n',
+    'ñ' => 'n',
+    'ṭ' => 't',
+    'ḍ' => 'd',
+    'ṇ' => 'n',
+    'ś' => 's',
+    'ṣ' => 's',
+    'ṁ' => 'm',
+    'ṃ' => 'm',
+    'ḥ' => 'h',
+    'ḷ' => 'l',
+    'ḹ' => 'l',
+
+    //Hebrew
+    'א' => 'a',
+    'ב' => 'b',
+    'ג' => 'g',
+    'ד' => 'd',
+    'ה' => 'h',
+    'ו' => 'v',
+    'ז' => 'z',
+    'ח' => 'kh',
+    'ט' => 'th',
+    'י' => 'y',
+    'ך' => 'h',
+    'כ' => 'k',
+    'ל' => 'l',
+    'ם' => 'm',
+    'מ' => 'm',
+    'ן' => 'n',
+    'נ' => 'n',
+    'ס' => 's',
+    'ע' => 'ah',
+    'ף' => 'f',
+    'פ' => 'p',
+    'ץ' => 'c',
+    'צ' => 'c',
+    'ק' => 'q',
+    'ר' => 'r',
+    'ש' => 'sh',
+    'ת' => 't',
+
+    //Arabic
+    'ا' => 'a',
+    'ب' => 'b',
+    'ت' => 't',
+    'ث' => 'th',
+    'ج' => 'g',
+    'ح' => 'xh',
+    'خ' => 'x',
+    'د' => 'd',
+    'ذ' => 'dh',
+    'ر' => 'r',
+    'ز' => 'z',
+    'س' => 's',
+    'ش' => 'sh',
+    'ص' => 's\'',
+    'ض' => 'd\'',
+    'ط' => 't\'',
+    'ظ' => 'z\'',
+    'ع' => 'y',
+    'غ' => 'gh',
+    'ف' => 'f',
+    'ق' => 'q',
+    'ك' => 'k',
+    'ل' => 'l',
+    'م' => 'm',
+    'ن' => 'n',
+    'ه' => 'x\'',
+    'و' => 'u',
+    'ي' => 'i',
+
+    // Japanese characters  (last update: 2008-05-09)
+
+    // Japanese hiragana
+
+    // 3 character syllables, っ doubles the consonant after
+    'っちゃ' => 'ccha',
+    'っちぇ' => 'cche',
+    'っちょ' => 'ccho',
+    'っちゅ' => 'cchu',
+    'っびゃ' => 'bbya',
+    'っびぇ' => 'bbye',
+    'っびぃ' => 'bbyi',
+    'っびょ' => 'bbyo',
+    'っびゅ' => 'bbyu',
+    'っぴゃ' => 'ppya',
+    'っぴぇ' => 'ppye',
+    'っぴぃ' => 'ppyi',
+    'っぴょ' => 'ppyo',
+    'っぴゅ' => 'ppyu',
+    'っちゃ' => 'ccha',
+    'っちぇ' => 'cche',
+    'っち' => 'cchi',
+    'っちょ' => 'ccho',
+    'っちゅ' => 'cchu',
+    // 'っひゃ'=>'hya',
+    // 'っひぇ'=>'hye',
+    // 'っひぃ'=>'hyi',
+    // 'っひょ'=>'hyo',
+    // 'っひゅ'=>'hyu',
+    'っきゃ' => 'kkya',
+    'っきぇ' => 'kkye',
+    'っきぃ' => 'kkyi',
+    'っきょ' => 'kkyo',
+    'っきゅ' => 'kkyu',
+    'っぎゃ' => 'ggya',
+    'っぎぇ' => 'ggye',
+    'っぎぃ' => 'ggyi',
+    'っぎょ' => 'ggyo',
+    'っぎゅ' => 'ggyu',
+    'っみゃ' => 'mmya',
+    'っみぇ' => 'mmye',
+    'っみぃ' => 'mmyi',
+    'っみょ' => 'mmyo',
+    'っみゅ' => 'mmyu',
+    'っにゃ' => 'nnya',
+    'っにぇ' => 'nnye',
+    'っにぃ' => 'nnyi',
+    'っにょ' => 'nnyo',
+    'っにゅ' => 'nnyu',
+    'っりゃ' => 'rrya',
+    'っりぇ' => 'rrye',
+    'っりぃ' => 'rryi',
+    'っりょ' => 'rryo',
+    'っりゅ' => 'rryu',
+    'っしゃ' => 'ssha',
+    'っしぇ' => 'sshe',
+    'っし' => 'sshi',
+    'っしょ' => 'ssho',
+    'っしゅ' => 'sshu',
+
+    // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the
+    // apostrophe would be converted to _ anyway)
+    'んあ' => 'n_a',
+    'んえ' => 'n_e',
+    'んい' => 'n_i',
+    'んお' => 'n_o',
+    'んう' => 'n_u',
+    'んや' => 'n_ya',
+    'んよ' => 'n_yo',
+    'んゆ' => 'n_yu',
+
+    // 2 character syllables - normal
+    'ふぁ' => 'fa',
+    'ふぇ' => 'fe',
+    'ふぃ' => 'fi',
+    'ふぉ' => 'fo',
+    'ちゃ' => 'cha',
+    'ちぇ' => 'che',
+    'ち' => 'chi',
+    'ちょ' => 'cho',
+    'ちゅ' => 'chu',
+    'ひゃ' => 'hya',
+    'ひぇ' => 'hye',
+    'ひぃ' => 'hyi',
+    'ひょ' => 'hyo',
+    'ひゅ' => 'hyu',
+    'びゃ' => 'bya',
+    'びぇ' => 'bye',
+    'びぃ' => 'byi',
+    'びょ' => 'byo',
+    'びゅ' => 'byu',
+    'ぴゃ' => 'pya',
+    'ぴぇ' => 'pye',
+    'ぴぃ' => 'pyi',
+    'ぴょ' => 'pyo',
+    'ぴゅ' => 'pyu',
+    'きゃ' => 'kya',
+    'きぇ' => 'kye',
+    'きぃ' => 'kyi',
+    'きょ' => 'kyo',
+    'きゅ' => 'kyu',
+    'ぎゃ' => 'gya',
+    'ぎぇ' => 'gye',
+    'ぎぃ' => 'gyi',
+    'ぎょ' => 'gyo',
+    'ぎゅ' => 'gyu',
+    'みゃ' => 'mya',
+    'みぇ' => 'mye',
+    'みぃ' => 'myi',
+    'みょ' => 'myo',
+    'みゅ' => 'myu',
+    'にゃ' => 'nya',
+    'にぇ' => 'nye',
+    'にぃ' => 'nyi',
+    'にょ' => 'nyo',
+    'にゅ' => 'nyu',
+    'りゃ' => 'rya',
+    'りぇ' => 'rye',
+    'りぃ' => 'ryi',
+    'りょ' => 'ryo',
+    'りゅ' => 'ryu',
+    'しゃ' => 'sha',
+    'しぇ' => 'she',
+    'し' => 'shi',
+    'しょ' => 'sho',
+    'しゅ' => 'shu',
+    'じゃ' => 'ja',
+    'じぇ' => 'je',
+    'じょ' => 'jo',
+    'じゅ' => 'ju',
+    'うぇ' => 'we',
+    'うぃ' => 'wi',
+    'いぇ' => 'ye',
+
+    // 2 character syllables, っ doubles the consonant after
+    'っば' => 'bba',
+    'っべ' => 'bbe',
+    'っび' => 'bbi',
+    'っぼ' => 'bbo',
+    'っぶ' => 'bbu',
+    'っぱ' => 'ppa',
+    'っぺ' => 'ppe',
+    'っぴ' => 'ppi',
+    'っぽ' => 'ppo',
+    'っぷ' => 'ppu',
+    'った' => 'tta',
+    'って' => 'tte',
+    'っち' => 'cchi',
+    'っと' => 'tto',
+    'っつ' => 'ttsu',
+    'っだ' => 'dda',
+    'っで' => 'dde',
+    'っぢ' => 'ddi',
+    'っど' => 'ddo',
+    'っづ' => 'ddu',
+    'っが' => 'gga',
+    'っげ' => 'gge',
+    'っぎ' => 'ggi',
+    'っご' => 'ggo',
+    'っぐ' => 'ggu',
+    'っか' => 'kka',
+    'っけ' => 'kke',
+    'っき' => 'kki',
+    'っこ' => 'kko',
+    'っく' => 'kku',
+    'っま' => 'mma',
+    'っめ' => 'mme',
+    'っみ' => 'mmi',
+    'っも' => 'mmo',
+    'っむ' => 'mmu',
+    'っな' => 'nna',
+    'っね' => 'nne',
+    'っに' => 'nni',
+    'っの' => 'nno',
+    'っぬ' => 'nnu',
+    'っら' => 'rra',
+    'っれ' => 'rre',
+    'っり' => 'rri',
+    'っろ' => 'rro',
+    'っる' => 'rru',
+    'っさ' => 'ssa',
+    'っせ' => 'sse',
+    'っし' => 'sshi',
+    'っそ' => 'sso',
+    'っす' => 'ssu',
+    'っざ' => 'zza',
+    'っぜ' => 'zze',
+    'っじ' => 'jji',
+    'っぞ' => 'zzo',
+    'っず' => 'zzu',
+
+    // 1 character syllabels
+    'あ' => 'a',
+    'え' => 'e',
+    'い' => 'i',
+    'お' => 'o',
+    'う' => 'u',
+    'ん' => 'n',
+    'は' => 'ha',
+    'へ' => 'he',
+    'ひ' => 'hi',
+    'ほ' => 'ho',
+    'ふ' => 'fu',
+    'ば' => 'ba',
+    'べ' => 'be',
+    'び' => 'bi',
+    'ぼ' => 'bo',
+    'ぶ' => 'bu',
+    'ぱ' => 'pa',
+    'ぺ' => 'pe',
+    'ぴ' => 'pi',
+    'ぽ' => 'po',
+    'ぷ' => 'pu',
+    'た' => 'ta',
+    'て' => 'te',
+    'ち' => 'chi',
+    'と' => 'to',
+    'つ' => 'tsu',
+    'だ' => 'da',
+    'で' => 'de',
+    'ぢ' => 'di',
+    'ど' => 'do',
+    'づ' => 'du',
+    'が' => 'ga',
+    'げ' => 'ge',
+    'ぎ' => 'gi',
+    'ご' => 'go',
+    'ぐ' => 'gu',
+    'か' => 'ka',
+    'け' => 'ke',
+    'き' => 'ki',
+    'こ' => 'ko',
+    'く' => 'ku',
+    'ま' => 'ma',
+    'め' => 'me',
+    'み' => 'mi',
+    'も' => 'mo',
+    'む' => 'mu',
+    'な' => 'na',
+    'ね' => 'ne',
+    'に' => 'ni',
+    'の' => 'no',
+    'ぬ' => 'nu',
+    'ら' => 'ra',
+    'れ' => 're',
+    'り' => 'ri',
+    'ろ' => 'ro',
+    'る' => 'ru',
+    'さ' => 'sa',
+    'せ' => 'se',
+    'し' => 'shi',
+    'そ' => 'so',
+    'す' => 'su',
+    'わ' => 'wa',
+    'を' => 'wo',
+    'ざ' => 'za',
+    'ぜ' => 'ze',
+    'じ' => 'ji',
+    'ぞ' => 'zo',
+    'ず' => 'zu',
+    'や' => 'ya',
+    'よ' => 'yo',
+    'ゆ' => 'yu',
+    // old characters
+    'ゑ' => 'we',
+    'ゐ' => 'wi',
+
+    //  convert what's left (probably only kicks in when something's missing above)
+    // 'ぁ'=>'a','ぇ'=>'e','ぃ'=>'i','ぉ'=>'o','ぅ'=>'u',
+    // 'ゃ'=>'ya','ょ'=>'yo','ゅ'=>'yu',
+
+    // never seen one of those (disabled for the moment)
+    // 'ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo','ヴ'=>'vu',
+    // 'でゃ'=>'dha','でぇ'=>'dhe','でぃ'=>'dhi','でょ'=>'dho','でゅ'=>'dhu',
+    // 'どぁ'=>'dwa','どぇ'=>'dwe','どぃ'=>'dwi','どぉ'=>'dwo','どぅ'=>'dwu',
+    // 'ぢゃ'=>'dya','ぢぇ'=>'dye','ぢぃ'=>'dyi','ぢょ'=>'dyo','ぢゅ'=>'dyu',
+    // 'ふぁ'=>'fwa','ふぇ'=>'fwe','ふぃ'=>'fwi','ふぉ'=>'fwo','ふぅ'=>'fwu',
+    // 'ふゃ'=>'fya','ふぇ'=>'fye','ふぃ'=>'fyi','ふょ'=>'fyo','ふゅ'=>'fyu',
+    // 'すぁ'=>'swa','すぇ'=>'swe','すぃ'=>'swi','すぉ'=>'swo','すぅ'=>'swu',
+    // 'てゃ'=>'tha','てぇ'=>'the','てぃ'=>'thi','てょ'=>'tho','てゅ'=>'thu',
+    // 'つゃ'=>'tsa','つぇ'=>'tse','つぃ'=>'tsi','つょ'=>'tso','つ'=>'tsu',
+    // 'とぁ'=>'twa','とぇ'=>'twe','とぃ'=>'twi','とぉ'=>'two','とぅ'=>'twu',
+    // 'ヴゃ'=>'vya','ヴぇ'=>'vye','ヴぃ'=>'vyi','ヴょ'=>'vyo','ヴゅ'=>'vyu',
+    // 'うぁ'=>'wha','うぇ'=>'whe','うぃ'=>'whi','うぉ'=>'who','うぅ'=>'whu',
+    // 'じゃ'=>'zha','じぇ'=>'zhe','じぃ'=>'zhi','じょ'=>'zho','じゅ'=>'zhu',
+    // 'じゃ'=>'zya','じぇ'=>'zye','じぃ'=>'zyi','じょ'=>'zyo','じゅ'=>'zyu',
+
+    // 'spare' characters from other romanization systems
+    // 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du',
+    // 'ら'=>'la','れ'=>'le','り'=>'li','ろ'=>'lo','る'=>'lu',
+    // 'さ'=>'sa','せ'=>'se','し'=>'si','そ'=>'so','す'=>'su',
+    // 'ちゃ'=>'cya','ちぇ'=>'cye','ちぃ'=>'cyi','ちょ'=>'cyo','ちゅ'=>'cyu',
+    //'じゃ'=>'jya','じぇ'=>'jye','じぃ'=>'jyi','じょ'=>'jyo','じゅ'=>'jyu',
+    //'りゃ'=>'lya','りぇ'=>'lye','りぃ'=>'lyi','りょ'=>'lyo','りゅ'=>'lyu',
+    //'しゃ'=>'sya','しぇ'=>'sye','しぃ'=>'syi','しょ'=>'syo','しゅ'=>'syu',
+    //'ちゃ'=>'tya','ちぇ'=>'tye','ちぃ'=>'tyi','ちょ'=>'tyo','ちゅ'=>'tyu',
+    //'し'=>'ci',,い'=>'yi','ぢ'=>'dzi',
+    //'っじゃ'=>'jja','っじぇ'=>'jje','っじ'=>'jji','っじょ'=>'jjo','っじゅ'=>'jju',
+
+
+    // Japanese katakana
+
+    // 4 character syllables: ッ doubles the consonant after, ー doubles the vowel before
+    // (usualy written with macron, but we don't want that in our URLs)
+    'ッビャー' => 'bbyaa',
+    'ッビェー' => 'bbyee',
+    'ッビィー' => 'bbyii',
+    'ッビョー' => 'bbyoo',
+    'ッビュー' => 'bbyuu',
+    'ッピャー' => 'ppyaa',
+    'ッピェー' => 'ppyee',
+    'ッピィー' => 'ppyii',
+    'ッピョー' => 'ppyoo',
+    'ッピュー' => 'ppyuu',
+    'ッキャー' => 'kkyaa',
+    'ッキェー' => 'kkyee',
+    'ッキィー' => 'kkyii',
+    'ッキョー' => 'kkyoo',
+    'ッキュー' => 'kkyuu',
+    'ッギャー' => 'ggyaa',
+    'ッギェー' => 'ggyee',
+    'ッギィー' => 'ggyii',
+    'ッギョー' => 'ggyoo',
+    'ッギュー' => 'ggyuu',
+    'ッミャー' => 'mmyaa',
+    'ッミェー' => 'mmyee',
+    'ッミィー' => 'mmyii',
+    'ッミョー' => 'mmyoo',
+    'ッミュー' => 'mmyuu',
+    'ッニャー' => 'nnyaa',
+    'ッニェー' => 'nnyee',
+    'ッニィー' => 'nnyii',
+    'ッニョー' => 'nnyoo',
+    'ッニュー' => 'nnyuu',
+    'ッリャー' => 'rryaa',
+    'ッリェー' => 'rryee',
+    'ッリィー' => 'rryii',
+    'ッリョー' => 'rryoo',
+    'ッリュー' => 'rryuu',
+    'ッシャー' => 'sshaa',
+    'ッシェー' => 'sshee',
+    'ッシー' => 'sshii',
+    'ッショー' => 'sshoo',
+    'ッシュー' => 'sshuu',
+    'ッチャー' => 'cchaa',
+    'ッチェー' => 'cchee',
+    'ッチー' => 'cchii',
+    'ッチョー' => 'cchoo',
+    'ッチュー' => 'cchuu',
+    'ッティー' => 'ttii',
+    'ッヂィー' => 'ddii',
+
+    // 3 character syllables - doubled vowels
+    'ファー' => 'faa',
+    'フェー' => 'fee',
+    'フィー' => 'fii',
+    'フォー' => 'foo',
+    'フャー' => 'fyaa',
+    'フェー' => 'fyee',
+    'フィー' => 'fyii',
+    'フョー' => 'fyoo',
+    'フュー' => 'fyuu',
+    'ヒャー' => 'hyaa',
+    'ヒェー' => 'hyee',
+    'ヒィー' => 'hyii',
+    'ヒョー' => 'hyoo',
+    'ヒュー' => 'hyuu',
+    'ビャー' => 'byaa',
+    'ビェー' => 'byee',
+    'ビィー' => 'byii',
+    'ビョー' => 'byoo',
+    'ビュー' => 'byuu',
+    'ピャー' => 'pyaa',
+    'ピェー' => 'pyee',
+    'ピィー' => 'pyii',
+    'ピョー' => 'pyoo',
+    'ピュー' => 'pyuu',
+    'キャー' => 'kyaa',
+    'キェー' => 'kyee',
+    'キィー' => 'kyii',
+    'キョー' => 'kyoo',
+    'キュー' => 'kyuu',
+    'ギャー' => 'gyaa',
+    'ギェー' => 'gyee',
+    'ギィー' => 'gyii',
+    'ギョー' => 'gyoo',
+    'ギュー' => 'gyuu',
+    'ミャー' => 'myaa',
+    'ミェー' => 'myee',
+    'ミィー' => 'myii',
+    'ミョー' => 'myoo',
+    'ミュー' => 'myuu',
+    'ニャー' => 'nyaa',
+    'ニェー' => 'nyee',
+    'ニィー' => 'nyii',
+    'ニョー' => 'nyoo',
+    'ニュー' => 'nyuu',
+    'リャー' => 'ryaa',
+    'リェー' => 'ryee',
+    'リィー' => 'ryii',
+    'リョー' => 'ryoo',
+    'リュー' => 'ryuu',
+    'シャー' => 'shaa',
+    'シェー' => 'shee',
+    'シー' => 'shii',
+    'ショー' => 'shoo',
+    'シュー' => 'shuu',
+    'ジャー' => 'jaa',
+    'ジェー' => 'jee',
+    'ジー' => 'jii',
+    'ジョー' => 'joo',
+    'ジュー' => 'juu',
+    'スァー' => 'swaa',
+    'スェー' => 'swee',
+    'スィー' => 'swii',
+    'スォー' => 'swoo',
+    'スゥー' => 'swuu',
+    'デァー' => 'daa',
+    'デェー' => 'dee',
+    'ディー' => 'dii',
+    'デォー' => 'doo',
+    'デゥー' => 'duu',
+    'チャー' => 'chaa',
+    'チェー' => 'chee',
+    'チー' => 'chii',
+    'チョー' => 'choo',
+    'チュー' => 'chuu',
+    'ヂャー' => 'dyaa',
+    'ヂェー' => 'dyee',
+    'ヂィー' => 'dyii',
+    'ヂョー' => 'dyoo',
+    'ヂュー' => 'dyuu',
+    'ツャー' => 'tsaa',
+    'ツェー' => 'tsee',
+    'ツィー' => 'tsii',
+    'ツョー' => 'tsoo',
+    'ツー' => 'tsuu',
+    'トァー' => 'twaa',
+    'トェー' => 'twee',
+    'トィー' => 'twii',
+    'トォー' => 'twoo',
+    'トゥー' => 'twuu',
+    'ドァー' => 'dwaa',
+    'ドェー' => 'dwee',
+    'ドィー' => 'dwii',
+    'ドォー' => 'dwoo',
+    'ドゥー' => 'dwuu',
+    'ウァー' => 'whaa',
+    'ウェー' => 'whee',
+    'ウィー' => 'whii',
+    'ウォー' => 'whoo',
+    'ウゥー' => 'whuu',
+    'ヴャー' => 'vyaa',
+    'ヴェー' => 'vyee',
+    'ヴィー' => 'vyii',
+    'ヴョー' => 'vyoo',
+    'ヴュー' => 'vyuu',
+    'ヴァー' => 'vaa',
+    'ヴェー' => 'vee',
+    'ヴィー' => 'vii',
+    'ヴォー' => 'voo',
+    'ヴー' => 'vuu',
+    'ウェー' => 'wee',
+    'ウィー' => 'wii',
+    'イェー' => 'yee',
+    'ティー' => 'tii',
+    'ヂィー' => 'dii',
+
+    // 3 character syllables - doubled consonants
+    'ッビャ' => 'bbya',
+    'ッビェ' => 'bbye',
+    'ッビィ' => 'bbyi',
+    'ッビョ' => 'bbyo',
+    'ッビュ' => 'bbyu',
+    'ッピャ' => 'ppya',
+    'ッピェ' => 'ppye',
+    'ッピィ' => 'ppyi',
+    'ッピョ' => 'ppyo',
+    'ッピュ' => 'ppyu',
+    'ッキャ' => 'kkya',
+    'ッキェ' => 'kkye',
+    'ッキィ' => 'kkyi',
+    'ッキョ' => 'kkyo',
+    'ッキュ' => 'kkyu',
+    'ッギャ' => 'ggya',
+    'ッギェ' => 'ggye',
+    'ッギィ' => 'ggyi',
+    'ッギョ' => 'ggyo',
+    'ッギュ' => 'ggyu',
+    'ッミャ' => 'mmya',
+    'ッミェ' => 'mmye',
+    'ッミィ' => 'mmyi',
+    'ッミョ' => 'mmyo',
+    'ッミュ' => 'mmyu',
+    'ッニャ' => 'nnya',
+    'ッニェ' => 'nnye',
+    'ッニィ' => 'nnyi',
+    'ッニョ' => 'nnyo',
+    'ッニュ' => 'nnyu',
+    'ッリャ' => 'rrya',
+    'ッリェ' => 'rrye',
+    'ッリィ' => 'rryi',
+    'ッリョ' => 'rryo',
+    'ッリュ' => 'rryu',
+    'ッシャ' => 'ssha',
+    'ッシェ' => 'sshe',
+    'ッシ' => 'sshi',
+    'ッショ' => 'ssho',
+    'ッシュ' => 'sshu',
+    'ッチャ' => 'ccha',
+    'ッチェ' => 'cche',
+    'ッチ' => 'cchi',
+    'ッチョ' => 'ccho',
+    'ッチュ' => 'cchu',
+    'ッティ' => 'tti',
+    'ッヂィ' => 'ddi',
+
+    // 3 character syllables - doubled vowel and consonants
+    'ッバー' => 'bbaa',
+    'ッベー' => 'bbee',
+    'ッビー' => 'bbii',
+    'ッボー' => 'bboo',
+    'ッブー' => 'bbuu',
+    'ッパー' => 'ppaa',
+    'ッペー' => 'ppee',
+    'ッピー' => 'ppii',
+    'ッポー' => 'ppoo',
+    'ップー' => 'ppuu',
+    'ッケー' => 'kkee',
+    'ッキー' => 'kkii',
+    'ッコー' => 'kkoo',
+    'ックー' => 'kkuu',
+    'ッカー' => 'kkaa',
+    'ッガー' => 'ggaa',
+    'ッゲー' => 'ggee',
+    'ッギー' => 'ggii',
+    'ッゴー' => 'ggoo',
+    'ッグー' => 'gguu',
+    'ッマー' => 'maa',
+    'ッメー' => 'mee',
+    'ッミー' => 'mii',
+    'ッモー' => 'moo',
+    'ッムー' => 'muu',
+    'ッナー' => 'nnaa',
+    'ッネー' => 'nnee',
+    'ッニー' => 'nnii',
+    'ッノー' => 'nnoo',
+    'ッヌー' => 'nnuu',
+    'ッラー' => 'rraa',
+    'ッレー' => 'rree',
+    'ッリー' => 'rrii',
+    'ッロー' => 'rroo',
+    'ッルー' => 'rruu',
+    'ッサー' => 'ssaa',
+    'ッセー' => 'ssee',
+    'ッシー' => 'sshii',
+    'ッソー' => 'ssoo',
+    'ッスー' => 'ssuu',
+    'ッザー' => 'zzaa',
+    'ッゼー' => 'zzee',
+    'ッジー' => 'jjii',
+    'ッゾー' => 'zzoo',
+    'ッズー' => 'zzuu',
+    'ッター' => 'ttaa',
+    'ッテー' => 'ttee',
+    'ッチー' => 'chii',
+    'ットー' => 'ttoo',
+    'ッツー' => 'ttsuu',
+    'ッダー' => 'ddaa',
+    'ッデー' => 'ddee',
+    'ッヂー' => 'ddii',
+    'ッドー' => 'ddoo',
+    'ッヅー' => 'dduu',
+
+    // 2 character syllables - normal
+    'ファ' => 'fa',
+    'フェ' => 'fe',
+    'フィ' => 'fi',
+    'フォ' => 'fo',
+    'フゥ' => 'fu',
+    // 'フャ'=>'fya',
+    // 'フェ'=>'fye',
+    // 'フィ'=>'fyi',
+    // 'フョ'=>'fyo',
+    // 'フュ'=>'fyu',
+    'フャ' => 'fa',
+    'フェ' => 'fe',
+    'フィ' => 'fi',
+    'フョ' => 'fo',
+    'フュ' => 'fu',
+    'ヒャ' => 'hya',
+    'ヒェ' => 'hye',
+    'ヒィ' => 'hyi',
+    'ヒョ' => 'hyo',
+    'ヒュ' => 'hyu',
+    'ビャ' => 'bya',
+    'ビェ' => 'bye',
+    'ビィ' => 'byi',
+    'ビョ' => 'byo',
+    'ビュ' => 'byu',
+    'ピャ' => 'pya',
+    'ピェ' => 'pye',
+    'ピィ' => 'pyi',
+    'ピョ' => 'pyo',
+    'ピュ' => 'pyu',
+    'キャ' => 'kya',
+    'キェ' => 'kye',
+    'キィ' => 'kyi',
+    'キョ' => 'kyo',
+    'キュ' => 'kyu',
+    'ギャ' => 'gya',
+    'ギェ' => 'gye',
+    'ギィ' => 'gyi',
+    'ギョ' => 'gyo',
+    'ギュ' => 'gyu',
+    'ミャ' => 'mya',
+    'ミェ' => 'mye',
+    'ミィ' => 'myi',
+    'ミョ' => 'myo',
+    'ミュ' => 'myu',
+    'ニャ' => 'nya',
+    'ニェ' => 'nye',
+    'ニィ' => 'nyi',
+    'ニョ' => 'nyo',
+    'ニュ' => 'nyu',
+    'リャ' => 'rya',
+    'リェ' => 'rye',
+    'リィ' => 'ryi',
+    'リョ' => 'ryo',
+    'リュ' => 'ryu',
+    'シャ' => 'sha',
+    'シェ' => 'she',
+    'ショ' => 'sho',
+    'シュ' => 'shu',
+    'ジャ' => 'ja',
+    'ジェ' => 'je',
+    'ジョ' => 'jo',
+    'ジュ' => 'ju',
+    'スァ' => 'swa',
+    'スェ' => 'swe',
+    'スィ' => 'swi',
+    'スォ' => 'swo',
+    'スゥ' => 'swu',
+    'デァ' => 'da',
+    'デェ' => 'de',
+    'ディ' => 'di',
+    'デォ' => 'do',
+    'デゥ' => 'du',
+    'チャ' => 'cha',
+    'チェ' => 'che',
+    'チ' => 'chi',
+    'チョ' => 'cho',
+    'チュ' => 'chu',
+    // 'ヂャ'=>'dya',
+    // 'ヂェ'=>'dye',
+    // 'ヂィ'=>'dyi',
+    // 'ヂョ'=>'dyo',
+    // 'ヂュ'=>'dyu',
+    'ツャ' => 'tsa',
+    'ツェ' => 'tse',
+    'ツィ' => 'tsi',
+    'ツョ' => 'tso',
+    'ツ' => 'tsu',
+    'トァ' => 'twa',
+    'トェ' => 'twe',
+    'トィ' => 'twi',
+    'トォ' => 'two',
+    'トゥ' => 'twu',
+    'ドァ' => 'dwa',
+    'ドェ' => 'dwe',
+    'ドィ' => 'dwi',
+    'ドォ' => 'dwo',
+    'ドゥ' => 'dwu',
+    'ウァ' => 'wha',
+    'ウェ' => 'whe',
+    'ウィ' => 'whi',
+    'ウォ' => 'who',
+    'ウゥ' => 'whu',
+    'ヴャ' => 'vya',
+    'ヴェ' => 'vye',
+    'ヴィ' => 'vyi',
+    'ヴョ' => 'vyo',
+    'ヴュ' => 'vyu',
+    'ヴァ' => 'va',
+    'ヴェ' => 've',
+    'ヴィ' => 'vi',
+    'ヴォ' => 'vo',
+    'ヴ' => 'vu',
+    'ウェ' => 'we',
+    'ウィ' => 'wi',
+    'イェ' => 'ye',
+    'ティ' => 'ti',
+    'ヂィ' => 'di',
+
+    // 2 character syllables - doubled vocal
+    'アー' => 'aa',
+    'エー' => 'ee',
+    'イー' => 'ii',
+    'オー' => 'oo',
+    'ウー' => 'uu',
+    'ダー' => 'daa',
+    'デー' => 'dee',
+    'ヂー' => 'dii',
+    'ドー' => 'doo',
+    'ヅー' => 'duu',
+    'ハー' => 'haa',
+    'ヘー' => 'hee',
+    'ヒー' => 'hii',
+    'ホー' => 'hoo',
+    'フー' => 'fuu',
+    'バー' => 'baa',
+    'ベー' => 'bee',
+    'ビー' => 'bii',
+    'ボー' => 'boo',
+    'ブー' => 'buu',
+    'パー' => 'paa',
+    'ペー' => 'pee',
+    'ピー' => 'pii',
+    'ポー' => 'poo',
+    'プー' => 'puu',
+    'ケー' => 'kee',
+    'キー' => 'kii',
+    'コー' => 'koo',
+    'クー' => 'kuu',
+    'カー' => 'kaa',
+    'ガー' => 'gaa',
+    'ゲー' => 'gee',
+    'ギー' => 'gii',
+    'ゴー' => 'goo',
+    'グー' => 'guu',
+    'マー' => 'maa',
+    'メー' => 'mee',
+    'ミー' => 'mii',
+    'モー' => 'moo',
+    'ムー' => 'muu',
+    'ナー' => 'naa',
+    'ネー' => 'nee',
+    'ニー' => 'nii',
+    'ノー' => 'noo',
+    'ヌー' => 'nuu',
+    'ラー' => 'raa',
+    'レー' => 'ree',
+    'リー' => 'rii',
+    'ロー' => 'roo',
+    'ルー' => 'ruu',
+    'サー' => 'saa',
+    'セー' => 'see',
+    'シー' => 'shii',
+    'ソー' => 'soo',
+    'スー' => 'suu',
+    'ザー' => 'zaa',
+    'ゼー' => 'zee',
+    'ジー' => 'jii',
+    'ゾー' => 'zoo',
+    'ズー' => 'zuu',
+    'ター' => 'taa',
+    'テー' => 'tee',
+    'チー' => 'chii',
+    'トー' => 'too',
+    'ツー' => 'tsuu',
+    'ワー' => 'waa',
+    'ヲー' => 'woo',
+    'ヤー' => 'yaa',
+    'ヨー' => 'yoo',
+    'ユー' => 'yuu',
+    'ヵー' => 'kaa',
+    'ヶー' => 'kee',
+    // old characters
+    'ヱー' => 'wee',
+    'ヰー' => 'wii',
+
+    // seperate katakana 'n'
+    'ンア' => 'n_a',
+    'ンエ' => 'n_e',
+    'ンイ' => 'n_i',
+    'ンオ' => 'n_o',
+    'ンウ' => 'n_u',
+    'ンヤ' => 'n_ya',
+    'ンヨ' => 'n_yo',
+    'ンユ' => 'n_yu',
+
+    // 2 character syllables - doubled consonants
+    'ッバ' => 'bba',
+    'ッベ' => 'bbe',
+    'ッビ' => 'bbi',
+    'ッボ' => 'bbo',
+    'ッブ' => 'bbu',
+    'ッパ' => 'ppa',
+    'ッペ' => 'ppe',
+    'ッピ' => 'ppi',
+    'ッポ' => 'ppo',
+    'ップ' => 'ppu',
+    'ッケ' => 'kke',
+    'ッキ' => 'kki',
+    'ッコ' => 'kko',
+    'ック' => 'kku',
+    'ッカ' => 'kka',
+    'ッガ' => 'gga',
+    'ッゲ' => 'gge',
+    'ッギ' => 'ggi',
+    'ッゴ' => 'ggo',
+    'ッグ' => 'ggu',
+    'ッマ' => 'ma',
+    'ッメ' => 'me',
+    'ッミ' => 'mi',
+    'ッモ' => 'mo',
+    'ッム' => 'mu',
+    'ッナ' => 'nna',
+    'ッネ' => 'nne',
+    'ッニ' => 'nni',
+    'ッノ' => 'nno',
+    'ッヌ' => 'nnu',
+    'ッラ' => 'rra',
+    'ッレ' => 'rre',
+    'ッリ' => 'rri',
+    'ッロ' => 'rro',
+    'ッル' => 'rru',
+    'ッサ' => 'ssa',
+    'ッセ' => 'sse',
+    'ッシ' => 'sshi',
+    'ッソ' => 'sso',
+    'ッス' => 'ssu',
+    'ッザ' => 'zza',
+    'ッゼ' => 'zze',
+    'ッジ' => 'jji',
+    'ッゾ' => 'zzo',
+    'ッズ' => 'zzu',
+    'ッタ' => 'tta',
+    'ッテ' => 'tte',
+    'ッチ' => 'cchi',
+    'ット' => 'tto',
+    'ッツ' => 'ttsu',
+    'ッダ' => 'dda',
+    'ッデ' => 'dde',
+    'ッヂ' => 'ddi',
+    'ッド' => 'ddo',
+    'ッヅ' => 'ddu',
+
+    // 1 character syllables
+    'ア' => 'a',
+    'エ' => 'e',
+    'イ' => 'i',
+    'オ' => 'o',
+    'ウ' => 'u',
+    'ン' => 'n',
+    'ハ' => 'ha',
+    'ヘ' => 'he',
+    'ヒ' => 'hi',
+    'ホ' => 'ho',
+    'フ' => 'fu',
+    'バ' => 'ba',
+    'ベ' => 'be',
+    'ビ' => 'bi',
+    'ボ' => 'bo',
+    'ブ' => 'bu',
+    'パ' => 'pa',
+    'ペ' => 'pe',
+    'ピ' => 'pi',
+    'ポ' => 'po',
+    'プ' => 'pu',
+    'ケ' => 'ke',
+    'キ' => 'ki',
+    'コ' => 'ko',
+    'ク' => 'ku',
+    'カ' => 'ka',
+    'ガ' => 'ga',
+    'ゲ' => 'ge',
+    'ギ' => 'gi',
+    'ゴ' => 'go',
+    'グ' => 'gu',
+    'マ' => 'ma',
+    'メ' => 'me',
+    'ミ' => 'mi',
+    'モ' => 'mo',
+    'ム' => 'mu',
+    'ナ' => 'na',
+    'ネ' => 'ne',
+    'ニ' => 'ni',
+    'ノ' => 'no',
+    'ヌ' => 'nu',
+    'ラ' => 'ra',
+    'レ' => 're',
+    'リ' => 'ri',
+    'ロ' => 'ro',
+    'ル' => 'ru',
+    'サ' => 'sa',
+    'セ' => 'se',
+    'シ' => 'shi',
+    'ソ' => 'so',
+    'ス' => 'su',
+    'ザ' => 'za',
+    'ゼ' => 'ze',
+    'ジ' => 'ji',
+    'ゾ' => 'zo',
+    'ズ' => 'zu',
+    'タ' => 'ta',
+    'テ' => 'te',
+    'チ' => 'chi',
+    'ト' => 'to',
+    'ツ' => 'tsu',
+    'ダ' => 'da',
+    'デ' => 'de',
+    'ヂ' => 'di',
+    'ド' => 'do',
+    'ヅ' => 'du',
+    'ワ' => 'wa',
+    'ヲ' => 'wo',
+    'ヤ' => 'ya',
+    'ヨ' => 'yo',
+    'ユ' => 'yu',
+    'ヵ' => 'ka',
+    'ヶ' => 'ke',
+    // old characters
+    'ヱ' => 'we',
+    'ヰ' => 'wi',
+
+    //  convert what's left (probably only kicks in when something's missing above)
+    'ァ' => 'a',
+    'ェ' => 'e',
+    'ィ' => 'i',
+    'ォ' => 'o',
+    'ゥ' => 'u',
+    'ャ' => 'ya',
+    'ョ' => 'yo',
+    'ュ' => 'yu',
+
+    // special characters
+    '・' => '_',
+    '、' => '_',
+    'ー' => '_',
+    // when used with hiragana (seldom), this character would not be converted otherwise
+
+    // 'ラ'=>'la',
+    // 'レ'=>'le',
+    // 'リ'=>'li',
+    // 'ロ'=>'lo',
+    // 'ル'=>'lu',
+    // 'チャ'=>'cya',
+    // 'チェ'=>'cye',
+    // 'チィ'=>'cyi',
+    // 'チョ'=>'cyo',
+    // 'チュ'=>'cyu',
+    // 'デャ'=>'dha',
+    // 'デェ'=>'dhe',
+    // 'ディ'=>'dhi',
+    // 'デョ'=>'dho',
+    // 'デュ'=>'dhu',
+    // 'リャ'=>'lya',
+    // 'リェ'=>'lye',
+    // 'リィ'=>'lyi',
+    // 'リョ'=>'lyo',
+    // 'リュ'=>'lyu',
+    // 'テャ'=>'tha',
+    // 'テェ'=>'the',
+    // 'ティ'=>'thi',
+    // 'テョ'=>'tho',
+    // 'テュ'=>'thu',
+    // 'ファ'=>'fwa',
+    // 'フェ'=>'fwe',
+    // 'フィ'=>'fwi',
+    // 'フォ'=>'fwo',
+    // 'フゥ'=>'fwu',
+    // 'チャ'=>'tya',
+    // 'チェ'=>'tye',
+    // 'チィ'=>'tyi',
+    // 'チョ'=>'tyo',
+    // 'チュ'=>'tyu',
+    // 'ジャ'=>'jya',
+    // 'ジェ'=>'jye',
+    // 'ジィ'=>'jyi',
+    // 'ジョ'=>'jyo',
+    // 'ジュ'=>'jyu',
+    // 'ジャ'=>'zha',
+    // 'ジェ'=>'zhe',
+    // 'ジィ'=>'zhi',
+    // 'ジョ'=>'zho',
+    // 'ジュ'=>'zhu',
+    // 'ジャ'=>'zya',
+    // 'ジェ'=>'zye',
+    // 'ジィ'=>'zyi',
+    // 'ジョ'=>'zyo',
+    // 'ジュ'=>'zyu',
+    // 'シャ'=>'sya',
+    // 'シェ'=>'sye',
+    // 'シィ'=>'syi',
+    // 'ショ'=>'syo',
+    // 'シュ'=>'syu',
+    // 'シ'=>'ci',
+    // 'フ'=>'hu',
+    // 'シ'=>'si',
+    // 'チ'=>'ti',
+    // 'ツ'=>'tu',
+    // 'イ'=>'yi',
+    // 'ヂ'=>'dzi',
+
+    // "Greeklish"
+    'Γ' => 'G',
+    'Δ' => 'E',
+    'Θ' => 'Th',
+    'Λ' => 'L',
+    'Ξ' => 'X',
+    'Π' => 'P',
+    'Σ' => 'S',
+    'Φ' => 'F',
+    'Ψ' => 'Ps',
+    'γ' => 'g',
+    'δ' => 'e',
+    'θ' => 'th',
+    'λ' => 'l',
+    'ξ' => 'x',
+    'π' => 'p',
+    'σ' => 's',
+    'φ' => 'f',
+    'ψ' => 'ps',
+
+    // Thai
+    'ก' => 'k',
+    'ข' => 'kh',
+    'ฃ' => 'kh',
+    'ค' => 'kh',
+    'ฅ' => 'kh',
+    'ฆ' => 'kh',
+    'ง' => 'ng',
+    'จ' => 'ch',
+    'ฉ' => 'ch',
+    'ช' => 'ch',
+    'ซ' => 's',
+    'ฌ' => 'ch',
+    'ญ' => 'y',
+    'ฎ' => 'd',
+    'ฏ' => 't',
+    'ฐ' => 'th',
+    'ฑ' => 'd',
+    'ฒ' => 'th',
+    'ณ' => 'n',
+    'ด' => 'd',
+    'ต' => 't',
+    'ถ' => 'th',
+    'ท' => 'th',
+    'ธ' => 'th',
+    'น' => 'n',
+    'บ' => 'b',
+    'ป' => 'p',
+    'ผ' => 'ph',
+    'ฝ' => 'f',
+    'พ' => 'ph',
+    'ฟ' => 'f',
+    'ภ' => 'ph',
+    'ม' => 'm',
+    'ย' => 'y',
+    'ร' => 'r',
+    'ฤ' => 'rue',
+    'ฤๅ' => 'rue',
+    'ล' => 'l',
+    'ฦ' => 'lue',
+    'ฦๅ' => 'lue',
+    'ว' => 'w',
+    'ศ' => 's',
+    'ษ' => 's',
+    'ส' => 's',
+    'ห' => 'h',
+    'ฬ' => 'l',
+    'ฮ' => 'h',
+    'ะ' => 'a',
+    'ั' => 'a',
+    'รร' => 'a',
+    'า' => 'a',
+    'ๅ' => 'a',
+    'ำ' => 'am',
+    'ํา' => 'am',
+    'ิ' => 'i',
+    'ี' => 'i',
+    'ึ' => 'ue',
+    'ี' => 'ue',
+    'ุ' => 'u',
+    'ู' => 'u',
+    'เ' => 'e',
+    'แ' => 'ae',
+    'โ' => 'o',
+    'อ' => 'o',
+    'ียะ' => 'ia',
+    'ีย' => 'ia',
+    'ือะ' => 'uea',
+    'ือ' => 'uea',
+    'ัวะ' => 'ua',
+    'ัว' => 'ua',
+    'ใ' => 'ai',
+    'ไ' => 'ai',
+    'ัย' => 'ai',
+    'าย' => 'ai',
+    'าว' => 'ao',
+    'ุย' => 'ui',
+    'อย' => 'oi',
+    'ือย' => 'ueai',
+    'วย' => 'uai',
+    'ิว' => 'io',
+    '็ว' => 'eo',
+    'ียว' => 'iao',
+    '่' => '',
+    '้' => '',
+    '๊' => '',
+    '๋' => '',
+    '็' => '',
+    '์' => '',
+    '๎' => '',
+    'ํ' => '',
+    'ฺ' => '',
+    'ๆ' => '2',
+    '๏' => 'o',
+    'ฯ' => '-',
+    '๚' => '-',
+    '๛' => '-',
+    '๐' => '0',
+    '๑' => '1',
+    '๒' => '2',
+    '๓' => '3',
+    '๔' => '4',
+    '๕' => '5',
+    '๖' => '6',
+    '๗' => '7',
+    '๘' => '8',
+    '๙' => '9',
+
+    // Korean
+    'ㄱ' => 'k', 'ㅋ' => 'kh',
+    'ㄲ' => 'kk',
+    'ㄷ' => 't',
+    'ㅌ' => 'th',
+    'ㄸ' => 'tt',
+    'ㅂ' => 'p',
+    'ㅍ' => 'ph',
+    'ㅃ' => 'pp',
+    'ㅈ' => 'c',
+    'ㅊ' => 'ch',
+    'ㅉ' => 'cc',
+    'ㅅ' => 's',
+    'ㅆ' => 'ss',
+    'ㅎ' => 'h',
+    'ㅇ' => 'ng',
+    'ㄴ' => 'n',
+    'ㄹ' => 'l',
+    'ㅁ' => 'm',
+    'ㅏ' => 'a',
+    'ㅓ' => 'e',
+    'ㅗ' => 'o',
+    'ㅜ' => 'wu',
+    'ㅡ' => 'u',
+    'ㅣ' => 'i',
+    'ㅐ' => 'ay',
+    'ㅔ' => 'ey',
+    'ㅚ' => 'oy',
+    'ㅘ' => 'wa',
+    'ㅝ' => 'we',
+    'ㅟ' => 'wi',
+    'ㅙ' => 'way',
+    'ㅞ' => 'wey',
+    'ㅢ' => 'uy',
+    'ㅑ' => 'ya',
+    'ㅕ' => 'ye',
+    'ㅛ' => 'oy',
+    'ㅠ' => 'yu',
+    'ㅒ' => 'yay',
+    'ㅖ' => 'yey',
+];
diff --git a/platform/www/inc/Utf8/tables/specials.php b/platform/www/inc/Utf8/tables/specials.php
new file mode 100644
index 0000000..f6243bc
--- /dev/null
+++ b/platform/www/inc/Utf8/tables/specials.php
@@ -0,0 +1,615 @@
+<?php
+/**
+ * UTF-8 array of common special characters
+ *
+ * This array should contain all special characters (not a letter or digit)
+ * defined in the various local charsets - it's not a complete list of non-alphanum
+ * characters in UTF-8. It's not perfect but should match most cases of special
+ * chars.
+ *
+ * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is!
+ * These chars are _not_ in the array either:  _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ * @see    \dokuwiki\Utf8\Clean::stripspecials()
+ */
+return [
+    0x1a, // 
+    0x1b, // 
+    0x1c, // 
+    0x1d, // 
+    0x1e, // 
+    0x1f, // 
+    0x20, // <space>
+    0x21, // !
+    0x22, // "
+    0x23, // #
+    0x24, // $
+    0x25, // %
+    0x26, // &
+    0x27, // '
+    0x28, // (
+    0x29, // )
+    0x2b, // +
+    0x2c, // ,
+    0x2f, // /
+    0x3b, // ;
+    0x3c, // <
+    0x3d, // =
+    0x3e, // >
+    0x3f, // ?
+    0x40, // @
+    0x5b, // [
+    0x5c, // \
+    0x5d, // ]
+    0x5e, // ^
+    0x60, // `
+    0x7b, // {
+    0x7c, // |
+    0x7d, // }
+    0x7e, // ~
+    0x7f, // 
+    0x80, // 
+    0x81, // 
+    0x82, // 
+    0x83, // 
+    0x84, // 
+    0x85, // 
+    0x86, // 
+    0x87, // 
+    0x88, // 
+    0x89, // 
+    0x8a, // 
+    0x8b, // 
+    0x8c, // 
+    0x8d, // 
+    0x8e, // 
+    0x8f, // 
+    0x90, // 
+    0x91, // 
+    0x92, // 
+    0x93, // 
+    0x94, // 
+    0x95, // 
+    0x96, // 
+    0x97, // 
+    0x98, // 
+    0x99, // 
+    0x9a, // 
+    0x9b, // 
+    0x9c, // 
+    0x9d, // 
+    0x9e, // 
+    0x9f, // 
+    0xa0, //  
+    0xa1, // ¡
+    0xa2, // ¢
+    0xa3, // £
+    0xa4, // ¤
+    0xa5, // ¥
+    0xa6, // ¦
+    0xa7, // §
+    0xa8, // ¨
+    0xa9, // ©
+    0xaa, // ª
+    0xab, // «
+    0xac, // ¬
+    0xad, // 
+    0xae, // ®
+    0xaf, // ¯
+    0xb0, // °
+    0xb1, // ±
+    0xb2, // ²
+    0xb3, // ³
+    0xb4, // ´
+    0xb5, // µ
+    0xb6, // ¶
+    0xb7, // ·
+    0xb8, // ¸
+    0xb9, // ¹
+    0xba, // º
+    0xbb, // »
+    0xbc, // ¼
+    0xbd, // ½
+    0xbe, // ¾
+    0xbf, // ¿
+    0xd7, // ×
+    0xf7, // ÷
+    0x2c7, // ˇ
+    0x2d8, // ˘
+    0x2d9, // ˙
+    0x2da, // ˚
+    0x2db, // ˛
+    0x2dc, // ˜
+    0x2dd, // ˝
+    0x300, // ̀
+    0x301, // ́
+    0x303, // ̃
+    0x309, // ̉
+    0x323, // ̣
+    0x384, // ΄
+    0x385, // ΅
+    0x387, // ·
+    0x5b0, // ְ
+    0x5b1, // ֱ
+    0x5b2, // ֲ
+    0x5b3, // ֳ
+    0x5b4, // ִ
+    0x5b5, // ֵ
+    0x5b6, // ֶ
+    0x5b7, // ַ
+    0x5b8, // ָ
+    0x5b9, // ֹ
+    0x5bb, // ֻ
+    0x5bc, // ּ
+    0x5bd, // ֽ
+    0x5be, // ־
+    0x5bf, // ֿ
+    0x5c0, // ׀
+    0x5c1, // ׁ
+    0x5c2, // ׂ
+    0x5c3, // ׃
+    0x5f3, // ׳
+    0x5f4, // ״
+    0x60c, // ،
+    0x61b, // ؛
+    0x61f, // ؟
+    0x640, // ـ
+    0x64b, // ً
+    0x64c, // ٌ
+    0x64d, // ٍ
+    0x64e, // َ
+    0x64f, // ُ
+    0x650, // ِ
+    0x651, // ّ
+    0x652, // ْ
+    0x66a, // ٪
+    0xe3f, // ฿
+    0x200c, // ‌
+    0x200d, // ‍
+    0x200e, // ‎
+    0x200f, // ‏
+    0x2013, // –
+    0x2014, // —
+    0x2015, // ―
+    0x2017, // ‗
+    0x2018, // ‘
+    0x2019, // ’
+    0x201a, // ‚
+    0x201c, // “
+    0x201d, // ”
+    0x201e, // „
+    0x2020, // †
+    0x2021, // ‡
+    0x2022, // •
+    0x2026, // …
+    0x2030, // ‰
+    0x2032, // ′
+    0x2033, // ″
+    0x2039, // ‹
+    0x203a, // ›
+    0x2044, // ⁄
+    0x20a7, // ₧
+    0x20aa, // ₪
+    0x20ab, // ₫
+    0x20ac, // €
+    0x2116, // №
+    0x2118, // ℘
+    0x2122, // ™
+    0x2126, // Ω
+    0x2135, // ℵ
+    0x2190, // ←
+    0x2191, // ↑
+    0x2192, // →
+    0x2193, // ↓
+    0x2194, // ↔
+    0x2195, // ↕
+    0x21b5, // ↵
+    0x21d0, // ⇐
+    0x21d1, // ⇑
+    0x21d2, // ⇒
+    0x21d3, // ⇓
+    0x21d4, // ⇔
+    0x2200, // ∀
+    0x2202, // ∂
+    0x2203, // ∃
+    0x2205, // ∅
+    0x2206, // ∆
+    0x2207, // ∇
+    0x2208, // ∈
+    0x2209, // ∉
+    0x220b, // ∋
+    0x220f, // ∏
+    0x2211, // ∑
+    0x2212, // −
+    0x2215, // ∕
+    0x2217, // ∗
+    0x2219, // ∙
+    0x221a, // √
+    0x221d, // ∝
+    0x221e, // ∞
+    0x2220, // ∠
+    0x2227, // ∧
+    0x2228, // ∨
+    0x2229, // ∩
+    0x222a, // ∪
+    0x222b, // ∫
+    0x2234, // ∴
+    0x223c, // ∼
+    0x2245, // ≅
+    0x2248, // ≈
+    0x2260, // ≠
+    0x2261, // ≡
+    0x2264, // ≤
+    0x2265, // ≥
+    0x2282, // ⊂
+    0x2283, // ⊃
+    0x2284, // ⊄
+    0x2286, // ⊆
+    0x2287, // ⊇
+    0x2295, // ⊕
+    0x2297, // ⊗
+    0x22a5, // ⊥
+    0x22c5, // ⋅
+    0x2310, // ⌐
+    0x2320, // ⌠
+    0x2321, // ⌡
+    0x2329, // 〈
+    0x232a, // 〉
+    0x2469, // ⑩
+    0x2500, // ─
+    0x2502, // │
+    0x250c, // ┌
+    0x2510, // ┐
+    0x2514, // └
+    0x2518, // ┘
+    0x251c, // ├
+    0x2524, // ┤
+    0x252c, // ┬
+    0x2534, // ┴
+    0x253c, // ┼
+    0x2550, // ═
+    0x2551, // ║
+    0x2552, // ╒
+    0x2553, // ╓
+    0x2554, // ╔
+    0x2555, // ╕
+    0x2556, // ╖
+    0x2557, // ╗
+    0x2558, // ╘
+    0x2559, // ╙
+    0x255a, // ╚
+    0x255b, // ╛
+    0x255c, // ╜
+    0x255d, // ╝
+    0x255e, // ╞
+    0x255f, // ╟
+    0x2560, // ╠
+    0x2561, // ╡
+    0x2562, // ╢
+    0x2563, // ╣
+    0x2564, // ╤
+    0x2565, // ╥
+    0x2566, // ╦
+    0x2567, // ╧
+    0x2568, // ╨
+    0x2569, // ╩
+    0x256a, // ╪
+    0x256b, // ╫
+    0x256c, // ╬
+    0x2580, // ▀
+    0x2584, // ▄
+    0x2588, // █
+    0x258c, // ▌
+    0x2590, // ▐
+    0x2591, // ░
+    0x2592, // ▒
+    0x2593, // ▓
+    0x25a0, // ■
+    0x25b2, // ▲
+    0x25bc, // ▼
+    0x25c6, // ◆
+    0x25ca, // ◊
+    0x25cf, // ●
+    0x25d7, // ◗
+    0x2605, // ★
+    0x260e, // ☎
+    0x261b, // ☛
+    0x261e, // ☞
+    0x2660, // ♠
+    0x2663, // ♣
+    0x2665, // ♥
+    0x2666, // ♦
+    0x2701, // ✁
+    0x2702, // ✂
+    0x2703, // ✃
+    0x2704, // ✄
+    0x2706, // ✆
+    0x2707, // ✇
+    0x2708, // ✈
+    0x2709, // ✉
+    0x270c, // ✌
+    0x270d, // ✍
+    0x270e, // ✎
+    0x270f, // ✏
+    0x2710, // ✐
+    0x2711, // ✑
+    0x2712, // ✒
+    0x2713, // ✓
+    0x2714, // ✔
+    0x2715, // ✕
+    0x2716, // ✖
+    0x2717, // ✗
+    0x2718, // ✘
+    0x2719, // ✙
+    0x271a, // ✚
+    0x271b, // ✛
+    0x271c, // ✜
+    0x271d, // ✝
+    0x271e, // ✞
+    0x271f, // ✟
+    0x2720, // ✠
+    0x2721, // ✡
+    0x2722, // ✢
+    0x2723, // ✣
+    0x2724, // ✤
+    0x2725, // ✥
+    0x2726, // ✦
+    0x2727, // ✧
+    0x2729, // ✩
+    0x272a, // ✪
+    0x272b, // ✫
+    0x272c, // ✬
+    0x272d, // ✭
+    0x272e, // ✮
+    0x272f, // ✯
+    0x2730, // ✰
+    0x2731, // ✱
+    0x2732, // ✲
+    0x2733, // ✳
+    0x2734, // ✴
+    0x2735, // ✵
+    0x2736, // ✶
+    0x2737, // ✷
+    0x2738, // ✸
+    0x2739, // ✹
+    0x273a, // ✺
+    0x273b, // ✻
+    0x273c, // ✼
+    0x273d, // ✽
+    0x273e, // ✾
+    0x273f, // ✿
+    0x2740, // ❀
+    0x2741, // ❁
+    0x2742, // ❂
+    0x2743, // ❃
+    0x2744, // ❄
+    0x2745, // ❅
+    0x2746, // ❆
+    0x2747, // ❇
+    0x2748, // ❈
+    0x2749, // ❉
+    0x274a, // ❊
+    0x274b, // ❋
+    0x274d, // ❍
+    0x274f, // ❏
+    0x2750, // ❐
+    0x2751, // ❑
+    0x2752, // ❒
+    0x2756, // ❖
+    0x2758, // ❘
+    0x2759, // ❙
+    0x275a, // ❚
+    0x275b, // ❛
+    0x275c, // ❜
+    0x275d, // ❝
+    0x275e, // ❞
+    0x2761, // ❡
+    0x2762, // ❢
+    0x2763, // ❣
+    0x2764, // ❤
+    0x2765, // ❥
+    0x2766, // ❦
+    0x2767, // ❧
+    0x277f, // ❿
+    0x2789, // ➉
+    0x2793, // ➓
+    0x2794, // ➔
+    0x2798, // ➘
+    0x2799, // ➙
+    0x279a, // ➚
+    0x279b, // ➛
+    0x279c, // ➜
+    0x279d, // ➝
+    0x279e, // ➞
+    0x279f, // ➟
+    0x27a0, // ➠
+    0x27a1, // ➡
+    0x27a2, // ➢
+    0x27a3, // ➣
+    0x27a4, // ➤
+    0x27a5, // ➥
+    0x27a6, // ➦
+    0x27a7, // ➧
+    0x27a8, // ➨
+    0x27a9, // ➩
+    0x27aa, // ➪
+    0x27ab, // ➫
+    0x27ac, // ➬
+    0x27ad, // ➭
+    0x27ae, // ➮
+    0x27af, // ➯
+    0x27b1, // ➱
+    0x27b2, // ➲
+    0x27b3, // ➳
+    0x27b4, // ➴
+    0x27b5, // ➵
+    0x27b6, // ➶
+    0x27b7, // ➷
+    0x27b8, // ➸
+    0x27b9, // ➹
+    0x27ba, // ➺
+    0x27bb, // ➻
+    0x27bc, // ➼
+    0x27bd, // ➽
+    0x27be, // ➾
+    0x3000, // 　
+    0x3001, // 、
+    0x3002, // 。
+    0x3003, // 〃
+    0x3008, // 〈
+    0x3009, // 〉
+    0x300a, // 《
+    0x300b, // 》
+    0x300c, // 「
+    0x300d, // 」
+    0x300e, // 『
+    0x300f, // 』
+    0x3010, // 【
+    0x3011, // 】
+    0x3012, // 〒
+    0x3014, // 〔
+    0x3015, // 〕
+    0x3016, // 〖
+    0x3017, // 〗
+    0x3018, // 〘
+    0x3019, // 〙
+    0x301a, // 〚
+    0x301b, // 〛
+    0x3036, // 〶
+    0xf6d9, // 
+    0xf6da, // 
+    0xf6db, // 
+    0xf8d7, // 
+    0xf8d8, // 
+    0xf8d9, // 
+    0xf8da, // 
+    0xf8db, // 
+    0xf8dc, // 
+    0xf8dd, // 
+    0xf8de, // 
+    0xf8df, // 
+    0xf8e0, // 
+    0xf8e1, // 
+    0xf8e2, // 
+    0xf8e3, // 
+    0xf8e4, // 
+    0xf8e5, // 
+    0xf8e6, // 
+    0xf8e7, // 
+    0xf8e8, // 
+    0xf8e9, // 
+    0xf8ea, // 
+    0xf8eb, // 
+    0xf8ec, // 
+    0xf8ed, // 
+    0xf8ee, // 
+    0xf8ef, // 
+    0xf8f0, // 
+    0xf8f1, // 
+    0xf8f2, // 
+    0xf8f3, // 
+    0xf8f4, // 
+    0xf8f5, // 
+    0xf8f6, // 
+    0xf8f7, // 
+    0xf8f8, // 
+    0xf8f9, // 
+    0xf8fa, // 
+    0xf8fb, // 
+    0xf8fc, // 
+    0xf8fd, // 
+    0xf8fe, // 
+    0xfe7c, // ﹼ
+    0xfe7d, // ﹽ
+    0xff01, // ！
+    0xff02, // ＂
+    0xff03, // ＃
+    0xff04, // ＄
+    0xff05, // ％
+    0xff06, // ＆
+    0xff07, // ＇
+    0xff08, // （
+    0xff09, // ）
+    0xff09, // ）
+    0xff0a, // ＊
+    0xff0b, // ＋
+    0xff0c, // ，
+    0xff0d, // －
+    0xff0e, // ．
+    0xff0f, // ／
+    0xff1a, // ：
+    0xff1b, // ；
+    0xff1c, // ＜
+    0xff1d, // ＝
+    0xff1e, // ＞
+    0xff1f, // ？
+    0xff20, // ＠
+    0xff3b, // ［
+    0xff3c, // ＼
+    0xff3d, // ］
+    0xff3e, // ＾
+    0xff40, // ｀
+    0xff5b, // ｛
+    0xff5c, // ｜
+    0xff5d, // ｝
+    0xff5e, // ～
+    0xff5f, // ｟
+    0xff60, // ｠
+    0xff61, // ｡
+    0xff62, // ｢
+    0xff63, // ｣
+    0xff64, // ､
+    0xff65, // ･
+    0xffe0, // ￠
+    0xffe1, // ￡
+    0xffe2, // ￢
+    0xffe3, // ￣
+    0xffe4, // ￤
+    0xffe5, // ￥
+    0xffe6, // ￦
+    0xffe8, // ￨
+    0xffe9, // ￩
+    0xffea, // ￪
+    0xffeb, // ￫
+    0xffec, // ￬
+    0xffed, // ￭
+    0xffee, // ￮
+    0x1d6fc, // 𝛼
+    0x1d6fd, // 𝛽
+    0x1d6fe, // 𝛾
+    0x1d6ff, // 𝛿
+    0x1d700, // 𝜀
+    0x1d701, // 𝜁
+    0x1d702, // 𝜂
+    0x1d703, // 𝜃
+    0x1d704, // 𝜄
+    0x1d705, // 𝜅
+    0x1d706, // 𝜆
+    0x1d707, // 𝜇
+    0x1d708, // 𝜈
+    0x1d709, // 𝜉
+    0x1d70a, // 𝜊
+    0x1d70b, // 𝜋
+    0x1d70c, // 𝜌
+    0x1d70d, // 𝜍
+    0x1d70e, // 𝜎
+    0x1d70f, // 𝜏
+    0x1d710, // 𝜐
+    0x1d711, // 𝜑
+    0x1d712, // 𝜒
+    0x1d713, // 𝜓
+    0x1d714, // 𝜔
+    0x1d715, // 𝜕
+    0x1d716, // 𝜖
+    0x1d717, // 𝜗
+    0x1d718, // 𝜘
+    0x1d719, // 𝜙
+    0x1d71a, // 𝜚
+    0x1d71b, // 𝜛
+    0xc2a0, // 슠
+    0xe28087, //
+    0xe280af, //
+    0xe281a0, //
+    0xefbbbf, //
+];
diff --git a/platform/www/inc/Utf8/tables/upperaccents.php b/platform/www/inc/Utf8/tables/upperaccents.php
new file mode 100644
index 0000000..e6e48de
--- /dev/null
+++ b/platform/www/inc/Utf8/tables/upperaccents.php
@@ -0,0 +1,114 @@
+<?php
+/**
+ * UTF-8 lookup table for upper case accented letters
+ *
+ * This lookuptable defines replacements for accented characters from the ASCII-7
+ * range. This are upper case letters only.
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ * @see    \dokuwiki\Utf8\Clean::deaccent()
+ */
+return [
+    'Á' => 'A',
+    'À' => 'A',
+    'Ă' => 'A',
+    'Â' => 'A',
+    'Å' => 'A',
+    'Ä' => 'Ae',
+    'Ã' => 'A',
+    'Ą' => 'A',
+    'Ā' => 'A',
+    'Æ' => 'Ae',
+    'Ḃ' => 'B',
+    'Ć' => 'C',
+    'Ĉ' => 'C',
+    'Č' => 'C',
+    'Ċ' => 'C',
+    'Ç' => 'C',
+    'Ď' => 'D',
+    'Ḋ' => 'D',
+    'Đ' => 'D',
+    'Ð' => 'Dh',
+    'É' => 'E',
+    'È' => 'E',
+    'Ĕ' => 'E',
+    'Ê' => 'E',
+    'Ě' => 'E',
+    'Ë' => 'E',
+    'Ė' => 'E',
+    'Ę' => 'E',
+    'Ē' => 'E',
+    'Ḟ' => 'F',
+    'Ƒ' => 'F',
+    'Ğ' => 'G',
+    'Ĝ' => 'G',
+    'Ġ' => 'G',
+    'Ģ' => 'G',
+    'Ĥ' => 'H',
+    'Ħ' => 'H',
+    'Í' => 'I',
+    'Ì' => 'I',
+    'Î' => 'I',
+    'Ï' => 'I',
+    'Ĩ' => 'I',
+    'Į' => 'I',
+    'Ī' => 'I',
+    'Ĵ' => 'J',
+    'Ķ' => 'K',
+    'Ĺ' => 'L',
+    'Ľ' => 'L',
+    'Ļ' => 'L',
+    'Ł' => 'L',
+    'Ṁ' => 'M',
+    'Ń' => 'N',
+    'Ň' => 'N',
+    'Ñ' => 'N',
+    'Ņ' => 'N',
+    'Ó' => 'O',
+    'Ò' => 'O',
+    'Ô' => 'O',
+    'Ö' => 'Oe',
+    'Ő' => 'O',
+    'Õ' => 'O',
+    'Ø' => 'O',
+    'Ō' => 'O',
+    'Ơ' => 'O',
+    'Ṗ' => 'P',
+    'Ŕ' => 'R',
+    'Ř' => 'R',
+    'Ŗ' => 'R',
+    'Ś' => 'S',
+    'Ŝ' => 'S',
+    'Š' => 'S',
+    'Ṡ' => 'S',
+    'Ş' => 'S',
+    'Ș' => 'S',
+    'Ť' => 'T',
+    'Ṫ' => 'T',
+    'Ţ' => 'T',
+    'Ț' => 'T',
+    'Ŧ' => 'T',
+    'Ú' => 'U',
+    'Ù' => 'U',
+    'Ŭ' => 'U',
+    'Û' => 'U',
+    'Ů' => 'U',
+    'Ü' => 'Ue',
+    'Ű' => 'U',
+    'Ũ' => 'U',
+    'Ų' => 'U',
+    'Ū' => 'U',
+    'Ư' => 'U',
+    'Ẃ' => 'W',
+    'Ẁ' => 'W',
+    'Ŵ' => 'W',
+    'Ẅ' => 'W',
+    'Ý' => 'Y',
+    'Ỳ' => 'Y',
+    'Ŷ' => 'Y',
+    'Ÿ' => 'Y',
+    'Ź' => 'Z',
+    'Ž' => 'Z',
+    'Ż' => 'Z',
+    'Þ' => 'Th',
+];