diff options
Diffstat (limited to 'platform/www/inc/Utf8/tables')
-rw-r--r-- | platform/www/inc/Utf8/tables/case.php | 659 | ||||
-rw-r--r-- | platform/www/inc/Utf8/tables/loweraccents.php | 116 | ||||
-rw-r--r-- | platform/www/inc/Utf8/tables/romanization.php | 1458 | ||||
-rw-r--r-- | platform/www/inc/Utf8/tables/specials.php | 615 | ||||
-rw-r--r-- | platform/www/inc/Utf8/tables/upperaccents.php | 114 |
5 files changed, 2962 insertions, 0 deletions
diff --git a/platform/www/inc/Utf8/tables/case.php b/platform/www/inc/Utf8/tables/case.php new file mode 100644 index 0000000..6c41b58 --- /dev/null +++ b/platform/www/inc/Utf8/tables/case.php @@ -0,0 +1,659 @@ +<?php +/** + * UTF-8 Case lookup table + * + * This lookuptable defines the lower case letters to their corresponding + * upper case letter in UTF-8 + * + * @author Andreas Gohr <andi@splitbrain.org> + */ +return [ + 'A' => 'a', + 'B' => 'b', + 'C' => 'c', + 'D' => 'd', + 'E' => 'e', + 'F' => 'f', + 'G' => 'g', + 'H' => 'h', + 'I' => 'i', + 'J' => 'j', + 'K' => 'k', + 'L' => 'l', + 'M' => 'm', + 'N' => 'n', + 'O' => 'o', + 'P' => 'p', + 'Q' => 'q', + 'R' => 'r', + 'S' => 's', + 'T' => 't', + 'U' => 'u', + 'V' => 'v', + 'W' => 'w', + 'X' => 'x', + 'Y' => 'y', + 'Z' => 'z', + 'À' => 'à', + 'Á' => 'á', + 'Â' => 'â', + 'Ã' => 'ã', + 'Ä' => 'ä', + 'Å' => 'å', + 'Æ' => 'æ', + 'Ç' => 'ç', + 'È' => 'è', + 'É' => 'é', + 'Ê' => 'ê', + 'Ë' => 'ë', + 'Ì' => 'ì', + 'Í' => 'í', + 'Î' => 'î', + 'Ï' => 'ï', + 'Ð' => 'ð', + 'Ñ' => 'ñ', + 'Ò' => 'ò', + 'Ó' => 'ó', + 'Ô' => 'ô', + 'Õ' => 'õ', + 'Ö' => 'ö', + 'Ø' => 'ø', + 'Ù' => 'ù', + 'Ú' => 'ú', + 'Û' => 'û', + 'Ü' => 'ü', + 'Ý' => 'ý', + 'Þ' => 'þ', + 'Ā' => 'ā', + 'Ă' => 'ă', + 'Ą' => 'ą', + 'Ć' => 'ć', + 'Ĉ' => 'ĉ', + 'Ċ' => 'ċ', + 'Č' => 'č', + 'Ď' => 'ď', + 'Đ' => 'đ', + 'Ē' => 'ē', + 'Ĕ' => 'ĕ', + 'Ė' => 'ė', + 'Ę' => 'ę', + 'Ě' => 'ě', + 'Ĝ' => 'ĝ', + 'Ğ' => 'ğ', + 'Ġ' => 'ġ', + 'Ģ' => 'ģ', + 'Ĥ' => 'ĥ', + 'Ħ' => 'ħ', + 'Ĩ' => 'ĩ', + 'Ī' => 'ī', + 'Ĭ' => 'ĭ', + 'Į' => 'į', + 'IJ' => 'ij', + 'Ĵ' => 'ĵ', + 'Ķ' => 'ķ', + 'Ĺ' => 'ĺ', + 'Ļ' => 'ļ', + 'Ľ' => 'ľ', + 'Ŀ' => 'ŀ', + 'Ł' => 'ł', + 'Ń' => 'ń', + 'Ņ' => 'ņ', + 'Ň' => 'ň', + 'Ŋ' => 'ŋ', + 'Ō' => 'ō', + 'Ŏ' => 'ŏ', + 'Ő' => 'ő', + 'Œ' => 'œ', + 'Ŕ' => 'ŕ', + 'Ŗ' => 'ŗ', + 'Ř' => 'ř', + 'Ś' => 'ś', + 'Ŝ' => 'ŝ', + 'Ş' => 'ş', + 'Š' => 'š', + 'Ţ' => 'ţ', + 'Ť' => 'ť', + 'Ŧ' => 'ŧ', + 'Ũ' => 'ũ', + 'Ū' => 'ū', + 'Ŭ' => 'ŭ', + 'Ů' => 'ů', + 'Ű' => 'ű', + 'Ų' => 'ų', + 'Ŵ' => 'ŵ', + 'Ŷ' => 'ŷ', + 'Ÿ' => 'ÿ', + 'Ź' => 'ź', + 'Ż' => 'ż', + 'Ž' => 'ž', + 'Ɓ' => 'ɓ', + 'Ƃ' => 'ƃ', + 'Ƅ' => 'ƅ', + 'Ɔ' => 'ɔ', + 'Ƈ' => 'ƈ', + 'Ɖ' => 'ɖ', + 'Ɗ' => 'ɗ', + 'Ƌ' => 'ƌ', + 'Ǝ' => 'ǝ', + 'Ə' => 'ə', + 'Ɛ' => 'ɛ', + 'Ƒ' => 'ƒ', + 'Ɣ' => 'ɣ', + 'Ɩ' => 'ɩ', + 'Ɨ' => 'ɨ', + 'Ƙ' => 'ƙ', + 'Ɯ' => 'ɯ', + 'Ɲ' => 'ɲ', + 'Ɵ' => 'ɵ', + 'Ơ' => 'ơ', + 'Ƣ' => 'ƣ', + 'Ƥ' => 'ƥ', + 'Ʀ' => 'ʀ', + 'Ƨ' => 'ƨ', + 'Ʃ' => 'ʃ', + 'Ƭ' => 'ƭ', + 'Ʈ' => 'ʈ', + 'Ư' => 'ư', + 'Ʊ' => 'ʊ', + 'Ʋ' => 'ʋ', + 'Ƴ' => 'ƴ', + 'Ƶ' => 'ƶ', + 'Ʒ' => 'ʒ', + 'Ƹ' => 'ƹ', + 'Ƽ' => 'ƽ', + 'Dž' => 'dž', + 'Lj' => 'lj', + 'Nj' => 'nj', + 'Ǎ' => 'ǎ', + 'Ǐ' => 'ǐ', + 'Ǒ' => 'ǒ', + 'Ǔ' => 'ǔ', + 'Ǖ' => 'ǖ', + 'Ǘ' => 'ǘ', + 'Ǚ' => 'ǚ', + 'Ǜ' => 'ǜ', + 'Ǟ' => 'ǟ', + 'Ǡ' => 'ǡ', + 'Ǣ' => 'ǣ', + 'Ǥ' => 'ǥ', + 'Ǧ' => 'ǧ', + 'Ǩ' => 'ǩ', + 'Ǫ' => 'ǫ', + 'Ǭ' => 'ǭ', + 'Ǯ' => 'ǯ', + 'Dz' => 'dz', + 'Ǵ' => 'ǵ', + 'Ƕ' => 'ƕ', + 'Ƿ' => 'ƿ', + 'Ǹ' => 'ǹ', + 'Ǻ' => 'ǻ', + 'Ǽ' => 'ǽ', + 'Ǿ' => 'ǿ', + 'Ȁ' => 'ȁ', + 'Ȃ' => 'ȃ', + 'Ȅ' => 'ȅ', + 'Ȇ' => 'ȇ', + 'Ȉ' => 'ȉ', + 'Ȋ' => 'ȋ', + 'Ȍ' => 'ȍ', + 'Ȏ' => 'ȏ', + 'Ȑ' => 'ȑ', + 'Ȓ' => 'ȓ', + 'Ȕ' => 'ȕ', + 'Ȗ' => 'ȗ', + 'Ș' => 'ș', + 'Ț' => 'ț', + 'Ȝ' => 'ȝ', + 'Ȟ' => 'ȟ', + 'Ƞ' => 'ƞ', + 'Ȣ' => 'ȣ', + 'Ȥ' => 'ȥ', + 'Ȧ' => 'ȧ', + 'Ȩ' => 'ȩ', + 'Ȫ' => 'ȫ', + 'Ȭ' => 'ȭ', + 'Ȯ' => 'ȯ', + 'Ȱ' => 'ȱ', + 'Ȳ' => 'ȳ', + 'Ά' => 'ά', + 'Έ' => 'έ', + 'Ή' => 'ή', + 'Ί' => 'ί', + 'Ό' => 'ό', + 'Ύ' => 'ύ', + 'Ώ' => 'ώ', + 'Α' => 'α', + 'Β' => 'β', + 'Γ' => 'γ', + 'Δ' => 'δ', + 'Ε' => 'ε', + 'Ζ' => 'ζ', + 'Η' => 'η', + 'Θ' => 'θ', + 'Ι' => 'ι', + 'Κ' => 'κ', + 'Λ' => 'λ', + 'Μ' => 'μ', + 'Ν' => 'ν', + 'Ξ' => 'ξ', + 'Ο' => 'ο', + 'Π' => 'π', + 'Ρ' => 'ρ', + 'Σ' => 'σ', + 'Τ' => 'τ', + 'Υ' => 'υ', + 'Φ' => 'φ', + 'Χ' => 'χ', + 'Ψ' => 'ψ', + 'Ω' => 'ω', + 'Ϊ' => 'ϊ', + 'Ϋ' => 'ϋ', + 'Ϙ' => 'ϙ', + 'Ϛ' => 'ϛ', + 'Ϝ' => 'ϝ', + 'Ϟ' => 'ϟ', + 'Ϡ' => 'ϡ', + 'Ϣ' => 'ϣ', + 'Ϥ' => 'ϥ', + 'Ϧ' => 'ϧ', + 'Ϩ' => 'ϩ', + 'Ϫ' => 'ϫ', + 'Ϭ' => 'ϭ', + 'Ϯ' => 'ϯ', + 'Ѐ' => 'ѐ', + 'Ё' => 'ё', + 'Ђ' => 'ђ', + 'Ѓ' => 'ѓ', + 'Є' => 'є', + 'Ѕ' => 'ѕ', + 'І' => 'і', + 'Ї' => 'ї', + 'Ј' => 'ј', + 'Љ' => 'љ', + 'Њ' => 'њ', + 'Ћ' => 'ћ', + 'Ќ' => 'ќ', + 'Ѝ' => 'ѝ', + 'Ў' => 'ў', + 'Џ' => 'џ', + 'А' => 'а', + 'Б' => 'б', + 'В' => 'в', + 'Г' => 'г', + 'Д' => 'д', + 'Е' => 'е', + 'Ж' => 'ж', + 'З' => 'з', + 'И' => 'и', + 'Й' => 'й', + 'К' => 'к', + 'Л' => 'л', + 'М' => 'м', + 'Н' => 'н', + 'О' => 'о', + 'П' => 'п', + 'Р' => 'р', + 'С' => 'с', + 'Т' => 'т', + 'У' => 'у', + 'Ф' => 'ф', + 'Х' => 'х', + 'Ц' => 'ц', + 'Ч' => 'ч', + 'Ш' => 'ш', + 'Щ' => 'щ', + 'Ъ' => 'ъ', + 'Ы' => 'ы', + 'Ь' => 'ь', + 'Э' => 'э', + 'Ю' => 'ю', + 'Я' => 'я', + 'Ѡ' => 'ѡ', + 'Ѣ' => 'ѣ', + 'Ѥ' => 'ѥ', + 'Ѧ' => 'ѧ', + 'Ѩ' => 'ѩ', + 'Ѫ' => 'ѫ', + 'Ѭ' => 'ѭ', + 'Ѯ' => 'ѯ', + 'Ѱ' => 'ѱ', + 'Ѳ' => 'ѳ', + 'Ѵ' => 'ѵ', + 'Ѷ' => 'ѷ', + 'Ѹ' => 'ѹ', + 'Ѻ' => 'ѻ', + 'Ѽ' => 'ѽ', + 'Ѿ' => 'ѿ', + 'Ҁ' => 'ҁ', + 'Ҋ' => 'ҋ', + 'Ҍ' => 'ҍ', + 'Ҏ' => 'ҏ', + 'Ґ' => 'ґ', + 'Ғ' => 'ғ', + 'Ҕ' => 'ҕ', + 'Җ' => 'җ', + 'Ҙ' => 'ҙ', + 'Қ' => 'қ', + 'Ҝ' => 'ҝ', + 'Ҟ' => 'ҟ', + 'Ҡ' => 'ҡ', + 'Ң' => 'ң', + 'Ҥ' => 'ҥ', + 'Ҧ' => 'ҧ', + 'Ҩ' => 'ҩ', + 'Ҫ' => 'ҫ', + 'Ҭ' => 'ҭ', + 'Ү' => 'ү', + 'Ұ' => 'ұ', + 'Ҳ' => 'ҳ', + 'Ҵ' => 'ҵ', + 'Ҷ' => 'ҷ', + 'Ҹ' => 'ҹ', + 'Һ' => 'һ', + 'Ҽ' => 'ҽ', + 'Ҿ' => 'ҿ', + 'Ӂ' => 'ӂ', + 'Ӄ' => 'ӄ', + 'Ӆ' => 'ӆ', + 'Ӈ' => 'ӈ', + 'Ӊ' => 'ӊ', + 'Ӌ' => 'ӌ', + 'Ӎ' => 'ӎ', + 'Ӑ' => 'ӑ', + 'Ӓ' => 'ӓ', + 'Ӕ' => 'ӕ', + 'Ӗ' => 'ӗ', + 'Ә' => 'ә', + 'Ӛ' => 'ӛ', + 'Ӝ' => 'ӝ', + 'Ӟ' => 'ӟ', + 'Ӡ' => 'ӡ', + 'Ӣ' => 'ӣ', + 'Ӥ' => 'ӥ', + 'Ӧ' => 'ӧ', + 'Ө' => 'ө', + 'Ӫ' => 'ӫ', + 'Ӭ' => 'ӭ', + 'Ӯ' => 'ӯ', + 'Ӱ' => 'ӱ', + 'Ӳ' => 'ӳ', + 'Ӵ' => 'ӵ', + 'Ӹ' => 'ӹ', + 'Ԁ' => 'ԁ', + 'Ԃ' => 'ԃ', + 'Ԅ' => 'ԅ', + 'Ԇ' => 'ԇ', + 'Ԉ' => 'ԉ', + 'Ԋ' => 'ԋ', + 'Ԍ' => 'ԍ', + 'Ԏ' => 'ԏ', + 'Ա' => 'ա', + 'Բ' => 'բ', + 'Գ' => 'գ', + 'Դ' => 'դ', + 'Ե' => 'ե', + 'Զ' => 'զ', + 'Է' => 'է', + 'Ը' => 'ը', + 'Թ' => 'թ', + 'Ժ' => 'ժ', + 'Ի' => 'ի', + 'Լ' => 'լ', + 'Խ' => 'խ', + 'Ծ' => 'ծ', + 'Կ' => 'կ', + 'Հ' => 'հ', + 'Ձ' => 'ձ', + 'Ղ' => 'ղ', + 'Ճ' => 'ճ', + 'Մ' => 'մ', + 'Յ' => 'յ', + 'Ն' => 'ն', + 'Շ' => 'շ', + 'Ո' => 'ո', + 'Չ' => 'չ', + 'Պ' => 'պ', + 'Ջ' => 'ջ', + 'Ռ' => 'ռ', + 'Ս' => 'ս', + 'Վ' => 'վ', + 'Տ' => 'տ', + 'Ր' => 'ր', + 'Ց' => 'ց', + 'Ւ' => 'ւ', + 'Փ' => 'փ', + 'Ք' => 'ք', + 'Օ' => 'օ', + 'Ֆ' => 'ֆ', + 'Ḁ' => 'ḁ', + 'Ḃ' => 'ḃ', + 'Ḅ' => 'ḅ', + 'Ḇ' => 'ḇ', + 'Ḉ' => 'ḉ', + 'Ḋ' => 'ḋ', + 'Ḍ' => 'ḍ', + 'Ḏ' => 'ḏ', + 'Ḑ' => 'ḑ', + 'Ḓ' => 'ḓ', + 'Ḕ' => 'ḕ', + 'Ḗ' => 'ḗ', + 'Ḙ' => 'ḙ', + 'Ḛ' => 'ḛ', + 'Ḝ' => 'ḝ', + 'Ḟ' => 'ḟ', + 'Ḡ' => 'ḡ', + 'Ḣ' => 'ḣ', + 'Ḥ' => 'ḥ', + 'Ḧ' => 'ḧ', + 'Ḩ' => 'ḩ', + 'Ḫ' => 'ḫ', + 'Ḭ' => 'ḭ', + 'Ḯ' => 'ḯ', + 'Ḱ' => 'ḱ', + 'Ḳ' => 'ḳ', + 'Ḵ' => 'ḵ', + 'Ḷ' => 'ḷ', + 'Ḹ' => 'ḹ', + 'Ḻ' => 'ḻ', + 'Ḽ' => 'ḽ', + 'Ḿ' => 'ḿ', + 'Ṁ' => 'ṁ', + 'Ṃ' => 'ṃ', + 'Ṅ' => 'ṅ', + 'Ṇ' => 'ṇ', + 'Ṉ' => 'ṉ', + 'Ṋ' => 'ṋ', + 'Ṍ' => 'ṍ', + 'Ṏ' => 'ṏ', + 'Ṑ' => 'ṑ', + 'Ṓ' => 'ṓ', + 'Ṕ' => 'ṕ', + 'Ṗ' => 'ṗ', + 'Ṙ' => 'ṙ', + 'Ṛ' => 'ṛ', + 'Ṝ' => 'ṝ', + 'Ṟ' => 'ṟ', + 'Ṡ' => 'ṡ', + 'Ṣ' => 'ṣ', + 'Ṥ' => 'ṥ', + 'Ṧ' => 'ṧ', + 'Ṩ' => 'ṩ', + 'Ṫ' => 'ṫ', + 'Ṭ' => 'ṭ', + 'Ṯ' => 'ṯ', + 'Ṱ' => 'ṱ', + 'Ṳ' => 'ṳ', + 'Ṵ' => 'ṵ', + 'Ṷ' => 'ṷ', + 'Ṹ' => 'ṹ', + 'Ṻ' => 'ṻ', + 'Ṽ' => 'ṽ', + 'Ṿ' => 'ṿ', + 'Ẁ' => 'ẁ', + 'Ẃ' => 'ẃ', + 'Ẅ' => 'ẅ', + 'Ẇ' => 'ẇ', + 'Ẉ' => 'ẉ', + 'Ẋ' => 'ẋ', + 'Ẍ' => 'ẍ', + 'Ẏ' => 'ẏ', + 'Ẑ' => 'ẑ', + 'Ẓ' => 'ẓ', + 'Ẕ' => 'ẕ', + 'Ạ' => 'ạ', + 'Ả' => 'ả', + 'Ấ' => 'ấ', + 'Ầ' => 'ầ', + 'Ẩ' => 'ẩ', + 'Ẫ' => 'ẫ', + 'Ậ' => 'ậ', + 'Ắ' => 'ắ', + 'Ằ' => 'ằ', + 'Ẳ' => 'ẳ', + 'Ẵ' => 'ẵ', + 'Ặ' => 'ặ', + 'Ẹ' => 'ẹ', + 'Ẻ' => 'ẻ', + 'Ẽ' => 'ẽ', + 'Ế' => 'ế', + 'Ề' => 'ề', + 'Ể' => 'ể', + 'Ễ' => 'ễ', + 'Ệ' => 'ệ', + 'Ỉ' => 'ỉ', + 'Ị' => 'ị', + 'Ọ' => 'ọ', + 'Ỏ' => 'ỏ', + 'Ố' => 'ố', + 'Ồ' => 'ồ', + 'Ổ' => 'ổ', + 'Ỗ' => 'ỗ', + 'Ộ' => 'ộ', + 'Ớ' => 'ớ', + 'Ờ' => 'ờ', + 'Ở' => 'ở', + 'Ỡ' => 'ỡ', + 'Ợ' => 'ợ', + 'Ụ' => 'ụ', + 'Ủ' => 'ủ', + 'Ứ' => 'ứ', + 'Ừ' => 'ừ', + 'Ử' => 'ử', + 'Ữ' => 'ữ', + 'Ự' => 'ự', + 'Ỳ' => 'ỳ', + 'Ỵ' => 'ỵ', + 'Ỷ' => 'ỷ', + 'Ỹ' => 'ỹ', + 'Ἀ' => 'ἀ', + 'Ἁ' => 'ἁ', + 'Ἂ' => 'ἂ', + 'Ἃ' => 'ἃ', + 'Ἄ' => 'ἄ', + 'Ἅ' => 'ἅ', + 'Ἆ' => 'ἆ', + 'Ἇ' => 'ἇ', + 'Ἐ' => 'ἐ', + 'Ἑ' => 'ἑ', + 'Ἒ' => 'ἒ', + 'Ἓ' => 'ἓ', + 'Ἔ' => 'ἔ', + 'Ἕ' => 'ἕ', + 'Ἡ' => 'ἡ', + 'Ἢ' => 'ἢ', + 'Ἣ' => 'ἣ', + 'Ἤ' => 'ἤ', + 'Ἥ' => 'ἥ', + 'Ἦ' => 'ἦ', + 'Ἧ' => 'ἧ', + 'Ἰ' => 'ἰ', + 'Ἱ' => 'ἱ', + 'Ἲ' => 'ἲ', + 'Ἳ' => 'ἳ', + 'Ἴ' => 'ἴ', + 'Ἵ' => 'ἵ', + 'Ἶ' => 'ἶ', + 'Ἷ' => 'ἷ', + 'Ὀ' => 'ὀ', + 'Ὁ' => 'ὁ', + 'Ὂ' => 'ὂ', + 'Ὃ' => 'ὃ', + 'Ὄ' => 'ὄ', + 'Ὅ' => 'ὅ', + 'Ὑ' => 'ὑ', + 'Ὓ' => 'ὓ', + 'Ὕ' => 'ὕ', + 'Ὗ' => 'ὗ', + 'Ὡ' => 'ὡ', + 'Ὢ' => 'ὢ', + 'Ὣ' => 'ὣ', + 'Ὤ' => 'ὤ', + 'Ὥ' => 'ὥ', + 'Ὦ' => 'ὦ', + 'Ὧ' => 'ὧ', + 'ᾈ' => 'ᾀ', + 'ᾉ' => 'ᾁ', + 'ᾊ' => 'ᾂ', + 'ᾋ' => 'ᾃ', + 'ᾌ' => 'ᾄ', + 'ᾍ' => 'ᾅ', + 'ᾎ' => 'ᾆ', + 'ᾏ' => 'ᾇ', + 'ᾘ' => 'ᾐ', + 'ᾙ' => 'ᾑ', + 'ᾚ' => 'ᾒ', + 'ᾛ' => 'ᾓ', + 'ᾜ' => 'ᾔ', + 'ᾝ' => 'ᾕ', + 'ᾞ' => 'ᾖ', + 'ᾟ' => 'ᾗ', + 'ᾩ' => 'ᾡ', + 'ᾪ' => 'ᾢ', + 'ᾫ' => 'ᾣ', + 'ᾬ' => 'ᾤ', + 'ᾭ' => 'ᾥ', + 'ᾮ' => 'ᾦ', + 'ᾯ' => 'ᾧ', + 'Ᾰ' => 'ᾰ', + 'Ᾱ' => 'ᾱ', + 'Ὰ' => 'ὰ', + 'ᾼ' => 'ᾳ', + 'Ὲ' => 'ὲ', + 'Ὴ' => 'ὴ', + 'ῌ' => 'ῃ', + 'Ῐ' => 'ῐ', + 'Ῑ' => 'ῑ', + 'Ὶ' => 'ὶ', + 'Ῡ' => 'ῡ', + 'Ὺ' => 'ὺ', + 'Ῥ' => 'ῥ', + 'Ὸ' => 'ὸ', + 'Ὼ' => 'ὼ', + 'ῼ' => 'ῳ', + 'A' => 'a', + 'B' => 'b', + 'C' => 'c', + 'D' => 'd', + 'E' => 'e', + 'F' => 'f', + 'G' => 'g', + 'H' => 'h', + 'I' => 'i', + 'J' => 'j', + 'K' => 'k', + 'L' => 'l', + 'M' => 'm', + 'N' => 'n', + 'O' => 'o', + 'P' => 'p', + 'Q' => 'q', + 'R' => 'r', + 'S' => 's', + 'T' => 't', + 'U' => 'u', + 'V' => 'v', + 'W' => 'w', + 'X' => 'x', + 'Y' => 'y', + 'Z' => 'z', +]; diff --git a/platform/www/inc/Utf8/tables/loweraccents.php b/platform/www/inc/Utf8/tables/loweraccents.php new file mode 100644 index 0000000..cc3ec8e --- /dev/null +++ b/platform/www/inc/Utf8/tables/loweraccents.php @@ -0,0 +1,116 @@ +<?php +/** + * UTF-8 lookup table for lower case accented letters + * + * This lookuptable defines replacements for accented characters from the ASCII-7 + * range. This are lower case letters only. + * + * @author Andreas Gohr <andi@splitbrain.org> + * @see \dokuwiki\Utf8\Clean::deaccent() + */ +return [ + 'á' => 'a', + 'à' => 'a', + 'ă' => 'a', + 'â' => 'a', + 'å' => 'a', + 'ä' => 'ae', + 'ã' => 'a', + 'ą' => 'a', + 'ā' => 'a', + 'æ' => 'ae', + 'ḃ' => 'b', + 'ć' => 'c', + 'ĉ' => 'c', + 'č' => 'c', + 'ċ' => 'c', + 'ç' => 'c', + 'ď' => 'd', + 'ḋ' => 'd', + 'đ' => 'd', + 'ð' => 'dh', + 'é' => 'e', + 'è' => 'e', + 'ĕ' => 'e', + 'ê' => 'e', + 'ě' => 'e', + 'ë' => 'e', + 'ė' => 'e', + 'ę' => 'e', + 'ē' => 'e', + 'ḟ' => 'f', + 'ƒ' => 'f', + 'ğ' => 'g', + 'ĝ' => 'g', + 'ġ' => 'g', + 'ģ' => 'g', + 'ĥ' => 'h', + 'ħ' => 'h', + 'í' => 'i', + 'ì' => 'i', + 'î' => 'i', + 'ï' => 'i', + 'ĩ' => 'i', + 'į' => 'i', + 'ī' => 'i', + 'ĵ' => 'j', + 'ķ' => 'k', + 'ĺ' => 'l', + 'ľ' => 'l', + 'ļ' => 'l', + 'ł' => 'l', + 'ṁ' => 'm', + 'ń' => 'n', + 'ň' => 'n', + 'ñ' => 'n', + 'ņ' => 'n', + 'ó' => 'o', + 'ò' => 'o', + 'ô' => 'o', + 'ö' => 'oe', + 'ő' => 'o', + 'õ' => 'o', + 'ø' => 'o', + 'ō' => 'o', + 'ơ' => 'o', + 'ṗ' => 'p', + 'ŕ' => 'r', + 'ř' => 'r', + 'ŗ' => 'r', + 'ś' => 's', + 'ŝ' => 's', + 'š' => 's', + 'ṡ' => 's', + 'ş' => 's', + 'ș' => 's', + 'ß' => 'ss', + 'ť' => 't', + 'ṫ' => 't', + 'ţ' => 't', + 'ț' => 't', + 'ŧ' => 't', + 'ú' => 'u', + 'ù' => 'u', + 'ŭ' => 'u', + 'û' => 'u', + 'ů' => 'u', + 'ü' => 'ue', + 'ű' => 'u', + 'ũ' => 'u', + 'ų' => 'u', + 'ū' => 'u', + 'ư' => 'u', + 'ẃ' => 'w', + 'ẁ' => 'w', + 'ŵ' => 'w', + 'ẅ' => 'w', + 'ý' => 'y', + 'ỳ' => 'y', + 'ŷ' => 'y', + 'ÿ' => 'y', + 'ź' => 'z', + 'ž' => 'z', + 'ż' => 'z', + 'þ' => 'th', + 'µ' => 'u', +]; diff --git a/platform/www/inc/Utf8/tables/romanization.php b/platform/www/inc/Utf8/tables/romanization.php new file mode 100644 index 0000000..e757b9c --- /dev/null +++ b/platform/www/inc/Utf8/tables/romanization.php @@ -0,0 +1,1458 @@ +<?php +/** + * Romanization lookup table + * + * This lookup tables provides a way to transform strings written in a language + * different from the ones based upon latin letters into plain ASCII. + * + * Please note: this is not a scientific transliteration table. It only works + * oneway from nonlatin to ASCII and it works by simple character replacement + * only. Specialities of each language are not supported. + * + * @todo some keys are used multiple times + * @todo remove or integrate commented pairs + * + * @author Andreas Gohr <andi@splitbrain.org> + * @author Vitaly Blokhin <vitinfo@vitn.com> + * @author Bisqwit <bisqwit@iki.fi> + * @author Arthit Suriyawongkul <arthit@gmail.com> + * @author Denis Scheither <amorphis@uni-bremen.de> + * @author Eivind Morland <eivind.morland@gmail.com> + * @link http://www.uconv.com/translit.htm + * @link http://kanjidict.stc.cx/hiragana.php?src=2 + * @link http://www.translatum.gr/converter/greek-transliteration.htm + * @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription + * @link http://www.btranslations.com/resources/romanization/korean.asp + */ +return [ + // scandinavian - differs from what we do in deaccent + 'å' => 'a', + 'Å' => 'A', + 'ä' => 'a', + 'Ä' => 'A', + 'ö' => 'o', + 'Ö' => 'O', + + //russian cyrillic + 'а' => 'a', + 'А' => 'A', + 'б' => 'b', + 'Б' => 'B', + 'в' => 'v', + 'В' => 'V', + 'г' => 'g', + 'Г' => 'G', + 'д' => 'd', + 'Д' => 'D', + 'е' => 'e', + 'Е' => 'E', + 'ё' => 'jo', + 'Ё' => 'Jo', + 'ж' => 'zh', + 'Ж' => 'Zh', + 'з' => 'z', + 'З' => 'Z', + 'и' => 'i', + 'И' => 'I', + 'й' => 'j', + 'Й' => 'J', + 'к' => 'k', + 'К' => 'K', + 'л' => 'l', + 'Л' => 'L', + 'м' => 'm', + 'М' => 'M', + 'н' => 'n', + 'Н' => 'N', + 'о' => 'o', + 'О' => 'O', + 'п' => 'p', + 'П' => 'P', + 'р' => 'r', + 'Р' => 'R', + 'с' => 's', + 'С' => 'S', + 'т' => 't', + 'Т' => 'T', + 'у' => 'u', + 'У' => 'U', + 'ф' => 'f', + 'Ф' => 'F', + 'х' => 'x', + 'Х' => 'X', + 'ц' => 'c', + 'Ц' => 'C', + 'ч' => 'ch', + 'Ч' => 'Ch', + 'ш' => 'sh', + 'Ш' => 'Sh', + 'щ' => 'sch', + 'Щ' => 'Sch', + 'ъ' => '', + 'Ъ' => '', + 'ы' => 'y', + 'Ы' => 'Y', + 'ь' => '', + 'Ь' => '', + 'э' => 'eh', + 'Э' => 'Eh', + 'ю' => 'ju', + 'Ю' => 'Ju', + 'я' => 'ja', + 'Я' => 'Ja', + + // Ukrainian cyrillic + 'Ґ' => 'Gh', + 'ґ' => 'gh', + 'Є' => 'Je', + 'є' => 'je', + 'І' => 'I', + 'і' => 'i', + 'Ї' => 'Ji', + 'ї' => 'ji', + + // Georgian + 'ა' => 'a', + 'ბ' => 'b', + 'გ' => 'g', + 'დ' => 'd', + 'ე' => 'e', + 'ვ' => 'v', + 'ზ' => 'z', + 'თ' => 'th', + 'ი' => 'i', + 'კ' => 'p', + 'ლ' => 'l', + 'მ' => 'm', + 'ნ' => 'n', + 'ო' => 'o', + 'პ' => 'p', + 'ჟ' => 'zh', + 'რ' => 'r', + 'ს' => 's', + 'ტ' => 't', + 'უ' => 'u', + 'ფ' => 'ph', + 'ქ' => 'kh', + 'ღ' => 'gh', + 'ყ' => 'q', + 'შ' => 'sh', + 'ჩ' => 'ch', + 'ც' => 'c', + 'ძ' => 'dh', + 'წ' => 'w', + 'ჭ' => 'j', + 'ხ' => 'x', + 'ჯ' => 'jh', + 'ჰ' => 'xh', + + //Sanskrit + 'अ' => 'a', + 'आ' => 'ah', + 'इ' => 'i', + 'ई' => 'ih', + 'उ' => 'u', + 'ऊ' => 'uh', + 'ऋ' => 'ry', + 'ॠ' => 'ryh', + 'ऌ' => 'ly', + 'ॡ' => 'lyh', + 'ए' => 'e', + 'ऐ' => 'ay', + 'ओ' => 'o', + 'औ' => 'aw', + 'अं' => 'amh', + 'अः' => 'aq', + 'क' => 'k', + 'ख' => 'kh', + 'ग' => 'g', + 'घ' => 'gh', + 'ङ' => 'nh', + 'च' => 'c', + 'छ' => 'ch', + 'ज' => 'j', + 'झ' => 'jh', + 'ञ' => 'ny', + 'ट' => 'tq', + 'ठ' => 'tqh', + 'ड' => 'dq', + 'ढ' => 'dqh', + 'ण' => 'nq', + 'त' => 't', + 'थ' => 'th', + 'द' => 'd', + 'ध' => 'dh', + 'न' => 'n', + 'प' => 'p', + 'फ' => 'ph', + 'ब' => 'b', + 'भ' => 'bh', + 'म' => 'm', + 'य' => 'z', + 'र' => 'r', + 'ल' => 'l', + 'व' => 'v', + 'श' => 'sh', + 'ष' => 'sqh', + 'स' => 's', + 'ह' => 'x', + + //Sanskrit diacritics + 'Ā' => 'A', + 'Ī' => 'I', + 'Ū' => 'U', + 'Ṛ' => 'R', + 'Ṝ' => 'R', + 'Ṅ' => 'N', + 'Ñ' => 'N', + 'Ṭ' => 'T', + 'Ḍ' => 'D', + 'Ṇ' => 'N', + 'Ś' => 'S', + 'Ṣ' => 'S', + 'Ṁ' => 'M', + 'Ṃ' => 'M', + 'Ḥ' => 'H', + 'Ḷ' => 'L', + 'Ḹ' => 'L', + 'ā' => 'a', + 'ī' => 'i', + 'ū' => 'u', + 'ṛ' => 'r', + 'ṝ' => 'r', + 'ṅ' => 'n', + 'ñ' => 'n', + 'ṭ' => 't', + 'ḍ' => 'd', + 'ṇ' => 'n', + 'ś' => 's', + 'ṣ' => 's', + 'ṁ' => 'm', + 'ṃ' => 'm', + 'ḥ' => 'h', + 'ḷ' => 'l', + 'ḹ' => 'l', + + //Hebrew + 'א' => 'a', + 'ב' => 'b', + 'ג' => 'g', + 'ד' => 'd', + 'ה' => 'h', + 'ו' => 'v', + 'ז' => 'z', + 'ח' => 'kh', + 'ט' => 'th', + 'י' => 'y', + 'ך' => 'h', + 'כ' => 'k', + 'ל' => 'l', + 'ם' => 'm', + 'מ' => 'm', + 'ן' => 'n', + 'נ' => 'n', + 'ס' => 's', + 'ע' => 'ah', + 'ף' => 'f', + 'פ' => 'p', + 'ץ' => 'c', + 'צ' => 'c', + 'ק' => 'q', + 'ר' => 'r', + 'ש' => 'sh', + 'ת' => 't', + + //Arabic + 'ا' => 'a', + 'ب' => 'b', + 'ت' => 't', + 'ث' => 'th', + 'ج' => 'g', + 'ح' => 'xh', + 'خ' => 'x', + 'د' => 'd', + 'ذ' => 'dh', + 'ر' => 'r', + 'ز' => 'z', + 'س' => 's', + 'ش' => 'sh', + 'ص' => 's\'', + 'ض' => 'd\'', + 'ط' => 't\'', + 'ظ' => 'z\'', + 'ع' => 'y', + 'غ' => 'gh', + 'ف' => 'f', + 'ق' => 'q', + 'ك' => 'k', + 'ل' => 'l', + 'م' => 'm', + 'ن' => 'n', + 'ه' => 'x\'', + 'و' => 'u', + 'ي' => 'i', + + // Japanese characters (last update: 2008-05-09) + + // Japanese hiragana + + // 3 character syllables, っ doubles the consonant after + 'っちゃ' => 'ccha', + 'っちぇ' => 'cche', + 'っちょ' => 'ccho', + 'っちゅ' => 'cchu', + 'っびゃ' => 'bbya', + 'っびぇ' => 'bbye', + 'っびぃ' => 'bbyi', + 'っびょ' => 'bbyo', + 'っびゅ' => 'bbyu', + 'っぴゃ' => 'ppya', + 'っぴぇ' => 'ppye', + 'っぴぃ' => 'ppyi', + 'っぴょ' => 'ppyo', + 'っぴゅ' => 'ppyu', + 'っちゃ' => 'ccha', + 'っちぇ' => 'cche', + 'っち' => 'cchi', + 'っちょ' => 'ccho', + 'っちゅ' => 'cchu', + // 'っひゃ'=>'hya', + // 'っひぇ'=>'hye', + // 'っひぃ'=>'hyi', + // 'っひょ'=>'hyo', + // 'っひゅ'=>'hyu', + 'っきゃ' => 'kkya', + 'っきぇ' => 'kkye', + 'っきぃ' => 'kkyi', + 'っきょ' => 'kkyo', + 'っきゅ' => 'kkyu', + 'っぎゃ' => 'ggya', + 'っぎぇ' => 'ggye', + 'っぎぃ' => 'ggyi', + 'っぎょ' => 'ggyo', + 'っぎゅ' => 'ggyu', + 'っみゃ' => 'mmya', + 'っみぇ' => 'mmye', + 'っみぃ' => 'mmyi', + 'っみょ' => 'mmyo', + 'っみゅ' => 'mmyu', + 'っにゃ' => 'nnya', + 'っにぇ' => 'nnye', + 'っにぃ' => 'nnyi', + 'っにょ' => 'nnyo', + 'っにゅ' => 'nnyu', + 'っりゃ' => 'rrya', + 'っりぇ' => 'rrye', + 'っりぃ' => 'rryi', + 'っりょ' => 'rryo', + 'っりゅ' => 'rryu', + 'っしゃ' => 'ssha', + 'っしぇ' => 'sshe', + 'っし' => 'sshi', + 'っしょ' => 'ssho', + 'っしゅ' => 'sshu', + + // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the + // apostrophe would be converted to _ anyway) + 'んあ' => 'n_a', + 'んえ' => 'n_e', + 'んい' => 'n_i', + 'んお' => 'n_o', + 'んう' => 'n_u', + 'んや' => 'n_ya', + 'んよ' => 'n_yo', + 'んゆ' => 'n_yu', + + // 2 character syllables - normal + 'ふぁ' => 'fa', + 'ふぇ' => 'fe', + 'ふぃ' => 'fi', + 'ふぉ' => 'fo', + 'ちゃ' => 'cha', + 'ちぇ' => 'che', + 'ち' => 'chi', + 'ちょ' => 'cho', + 'ちゅ' => 'chu', + 'ひゃ' => 'hya', + 'ひぇ' => 'hye', + 'ひぃ' => 'hyi', + 'ひょ' => 'hyo', + 'ひゅ' => 'hyu', + 'びゃ' => 'bya', + 'びぇ' => 'bye', + 'びぃ' => 'byi', + 'びょ' => 'byo', + 'びゅ' => 'byu', + 'ぴゃ' => 'pya', + 'ぴぇ' => 'pye', + 'ぴぃ' => 'pyi', + 'ぴょ' => 'pyo', + 'ぴゅ' => 'pyu', + 'きゃ' => 'kya', + 'きぇ' => 'kye', + 'きぃ' => 'kyi', + 'きょ' => 'kyo', + 'きゅ' => 'kyu', + 'ぎゃ' => 'gya', + 'ぎぇ' => 'gye', + 'ぎぃ' => 'gyi', + 'ぎょ' => 'gyo', + 'ぎゅ' => 'gyu', + 'みゃ' => 'mya', + 'みぇ' => 'mye', + 'みぃ' => 'myi', + 'みょ' => 'myo', + 'みゅ' => 'myu', + 'にゃ' => 'nya', + 'にぇ' => 'nye', + 'にぃ' => 'nyi', + 'にょ' => 'nyo', + 'にゅ' => 'nyu', + 'りゃ' => 'rya', + 'りぇ' => 'rye', + 'りぃ' => 'ryi', + 'りょ' => 'ryo', + 'りゅ' => 'ryu', + 'しゃ' => 'sha', + 'しぇ' => 'she', + 'し' => 'shi', + 'しょ' => 'sho', + 'しゅ' => 'shu', + 'じゃ' => 'ja', + 'じぇ' => 'je', + 'じょ' => 'jo', + 'じゅ' => 'ju', + 'うぇ' => 'we', + 'うぃ' => 'wi', + 'いぇ' => 'ye', + + // 2 character syllables, っ doubles the consonant after + 'っば' => 'bba', + 'っべ' => 'bbe', + 'っび' => 'bbi', + 'っぼ' => 'bbo', + 'っぶ' => 'bbu', + 'っぱ' => 'ppa', + 'っぺ' => 'ppe', + 'っぴ' => 'ppi', + 'っぽ' => 'ppo', + 'っぷ' => 'ppu', + 'った' => 'tta', + 'って' => 'tte', + 'っち' => 'cchi', + 'っと' => 'tto', + 'っつ' => 'ttsu', + 'っだ' => 'dda', + 'っで' => 'dde', + 'っぢ' => 'ddi', + 'っど' => 'ddo', + 'っづ' => 'ddu', + 'っが' => 'gga', + 'っげ' => 'gge', + 'っぎ' => 'ggi', + 'っご' => 'ggo', + 'っぐ' => 'ggu', + 'っか' => 'kka', + 'っけ' => 'kke', + 'っき' => 'kki', + 'っこ' => 'kko', + 'っく' => 'kku', + 'っま' => 'mma', + 'っめ' => 'mme', + 'っみ' => 'mmi', + 'っも' => 'mmo', + 'っむ' => 'mmu', + 'っな' => 'nna', + 'っね' => 'nne', + 'っに' => 'nni', + 'っの' => 'nno', + 'っぬ' => 'nnu', + 'っら' => 'rra', + 'っれ' => 'rre', + 'っり' => 'rri', + 'っろ' => 'rro', + 'っる' => 'rru', + 'っさ' => 'ssa', + 'っせ' => 'sse', + 'っし' => 'sshi', + 'っそ' => 'sso', + 'っす' => 'ssu', + 'っざ' => 'zza', + 'っぜ' => 'zze', + 'っじ' => 'jji', + 'っぞ' => 'zzo', + 'っず' => 'zzu', + + // 1 character syllabels + 'あ' => 'a', + 'え' => 'e', + 'い' => 'i', + 'お' => 'o', + 'う' => 'u', + 'ん' => 'n', + 'は' => 'ha', + 'へ' => 'he', + 'ひ' => 'hi', + 'ほ' => 'ho', + 'ふ' => 'fu', + 'ば' => 'ba', + 'べ' => 'be', + 'び' => 'bi', + 'ぼ' => 'bo', + 'ぶ' => 'bu', + 'ぱ' => 'pa', + 'ぺ' => 'pe', + 'ぴ' => 'pi', + 'ぽ' => 'po', + 'ぷ' => 'pu', + 'た' => 'ta', + 'て' => 'te', + 'ち' => 'chi', + 'と' => 'to', + 'つ' => 'tsu', + 'だ' => 'da', + 'で' => 'de', + 'ぢ' => 'di', + 'ど' => 'do', + 'づ' => 'du', + 'が' => 'ga', + 'げ' => 'ge', + 'ぎ' => 'gi', + 'ご' => 'go', + 'ぐ' => 'gu', + 'か' => 'ka', + 'け' => 'ke', + 'き' => 'ki', + 'こ' => 'ko', + 'く' => 'ku', + 'ま' => 'ma', + 'め' => 'me', + 'み' => 'mi', + 'も' => 'mo', + 'む' => 'mu', + 'な' => 'na', + 'ね' => 'ne', + 'に' => 'ni', + 'の' => 'no', + 'ぬ' => 'nu', + 'ら' => 'ra', + 'れ' => 're', + 'り' => 'ri', + 'ろ' => 'ro', + 'る' => 'ru', + 'さ' => 'sa', + 'せ' => 'se', + 'し' => 'shi', + 'そ' => 'so', + 'す' => 'su', + 'わ' => 'wa', + 'を' => 'wo', + 'ざ' => 'za', + 'ぜ' => 'ze', + 'じ' => 'ji', + 'ぞ' => 'zo', + 'ず' => 'zu', + 'や' => 'ya', + 'よ' => 'yo', + 'ゆ' => 'yu', + // old characters + 'ゑ' => 'we', + 'ゐ' => 'wi', + + // convert what's left (probably only kicks in when something's missing above) + // 'ぁ'=>'a','ぇ'=>'e','ぃ'=>'i','ぉ'=>'o','ぅ'=>'u', + // 'ゃ'=>'ya','ょ'=>'yo','ゅ'=>'yu', + + // never seen one of those (disabled for the moment) + // 'ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo','ヴ'=>'vu', + // 'でゃ'=>'dha','でぇ'=>'dhe','でぃ'=>'dhi','でょ'=>'dho','でゅ'=>'dhu', + // 'どぁ'=>'dwa','どぇ'=>'dwe','どぃ'=>'dwi','どぉ'=>'dwo','どぅ'=>'dwu', + // 'ぢゃ'=>'dya','ぢぇ'=>'dye','ぢぃ'=>'dyi','ぢょ'=>'dyo','ぢゅ'=>'dyu', + // 'ふぁ'=>'fwa','ふぇ'=>'fwe','ふぃ'=>'fwi','ふぉ'=>'fwo','ふぅ'=>'fwu', + // 'ふゃ'=>'fya','ふぇ'=>'fye','ふぃ'=>'fyi','ふょ'=>'fyo','ふゅ'=>'fyu', + // 'すぁ'=>'swa','すぇ'=>'swe','すぃ'=>'swi','すぉ'=>'swo','すぅ'=>'swu', + // 'てゃ'=>'tha','てぇ'=>'the','てぃ'=>'thi','てょ'=>'tho','てゅ'=>'thu', + // 'つゃ'=>'tsa','つぇ'=>'tse','つぃ'=>'tsi','つょ'=>'tso','つ'=>'tsu', + // 'とぁ'=>'twa','とぇ'=>'twe','とぃ'=>'twi','とぉ'=>'two','とぅ'=>'twu', + // 'ヴゃ'=>'vya','ヴぇ'=>'vye','ヴぃ'=>'vyi','ヴょ'=>'vyo','ヴゅ'=>'vyu', + // 'うぁ'=>'wha','うぇ'=>'whe','うぃ'=>'whi','うぉ'=>'who','うぅ'=>'whu', + // 'じゃ'=>'zha','じぇ'=>'zhe','じぃ'=>'zhi','じょ'=>'zho','じゅ'=>'zhu', + // 'じゃ'=>'zya','じぇ'=>'zye','じぃ'=>'zyi','じょ'=>'zyo','じゅ'=>'zyu', + + // 'spare' characters from other romanization systems + // 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', + // 'ら'=>'la','れ'=>'le','り'=>'li','ろ'=>'lo','る'=>'lu', + // 'さ'=>'sa','せ'=>'se','し'=>'si','そ'=>'so','す'=>'su', + // 'ちゃ'=>'cya','ちぇ'=>'cye','ちぃ'=>'cyi','ちょ'=>'cyo','ちゅ'=>'cyu', + //'じゃ'=>'jya','じぇ'=>'jye','じぃ'=>'jyi','じょ'=>'jyo','じゅ'=>'jyu', + //'りゃ'=>'lya','りぇ'=>'lye','りぃ'=>'lyi','りょ'=>'lyo','りゅ'=>'lyu', + //'しゃ'=>'sya','しぇ'=>'sye','しぃ'=>'syi','しょ'=>'syo','しゅ'=>'syu', + //'ちゃ'=>'tya','ちぇ'=>'tye','ちぃ'=>'tyi','ちょ'=>'tyo','ちゅ'=>'tyu', + //'し'=>'ci',,い'=>'yi','ぢ'=>'dzi', + //'っじゃ'=>'jja','っじぇ'=>'jje','っじ'=>'jji','っじょ'=>'jjo','っじゅ'=>'jju', + + + // Japanese katakana + + // 4 character syllables: ッ doubles the consonant after, ー doubles the vowel before + // (usualy written with macron, but we don't want that in our URLs) + 'ッビャー' => 'bbyaa', + 'ッビェー' => 'bbyee', + 'ッビィー' => 'bbyii', + 'ッビョー' => 'bbyoo', + 'ッビュー' => 'bbyuu', + 'ッピャー' => 'ppyaa', + 'ッピェー' => 'ppyee', + 'ッピィー' => 'ppyii', + 'ッピョー' => 'ppyoo', + 'ッピュー' => 'ppyuu', + 'ッキャー' => 'kkyaa', + 'ッキェー' => 'kkyee', + 'ッキィー' => 'kkyii', + 'ッキョー' => 'kkyoo', + 'ッキュー' => 'kkyuu', + 'ッギャー' => 'ggyaa', + 'ッギェー' => 'ggyee', + 'ッギィー' => 'ggyii', + 'ッギョー' => 'ggyoo', + 'ッギュー' => 'ggyuu', + 'ッミャー' => 'mmyaa', + 'ッミェー' => 'mmyee', + 'ッミィー' => 'mmyii', + 'ッミョー' => 'mmyoo', + 'ッミュー' => 'mmyuu', + 'ッニャー' => 'nnyaa', + 'ッニェー' => 'nnyee', + 'ッニィー' => 'nnyii', + 'ッニョー' => 'nnyoo', + 'ッニュー' => 'nnyuu', + 'ッリャー' => 'rryaa', + 'ッリェー' => 'rryee', + 'ッリィー' => 'rryii', + 'ッリョー' => 'rryoo', + 'ッリュー' => 'rryuu', + 'ッシャー' => 'sshaa', + 'ッシェー' => 'sshee', + 'ッシー' => 'sshii', + 'ッショー' => 'sshoo', + 'ッシュー' => 'sshuu', + 'ッチャー' => 'cchaa', + 'ッチェー' => 'cchee', + 'ッチー' => 'cchii', + 'ッチョー' => 'cchoo', + 'ッチュー' => 'cchuu', + 'ッティー' => 'ttii', + 'ッヂィー' => 'ddii', + + // 3 character syllables - doubled vowels + 'ファー' => 'faa', + 'フェー' => 'fee', + 'フィー' => 'fii', + 'フォー' => 'foo', + 'フャー' => 'fyaa', + 'フェー' => 'fyee', + 'フィー' => 'fyii', + 'フョー' => 'fyoo', + 'フュー' => 'fyuu', + 'ヒャー' => 'hyaa', + 'ヒェー' => 'hyee', + 'ヒィー' => 'hyii', + 'ヒョー' => 'hyoo', + 'ヒュー' => 'hyuu', + 'ビャー' => 'byaa', + 'ビェー' => 'byee', + 'ビィー' => 'byii', + 'ビョー' => 'byoo', + 'ビュー' => 'byuu', + 'ピャー' => 'pyaa', + 'ピェー' => 'pyee', + 'ピィー' => 'pyii', + 'ピョー' => 'pyoo', + 'ピュー' => 'pyuu', + 'キャー' => 'kyaa', + 'キェー' => 'kyee', + 'キィー' => 'kyii', + 'キョー' => 'kyoo', + 'キュー' => 'kyuu', + 'ギャー' => 'gyaa', + 'ギェー' => 'gyee', + 'ギィー' => 'gyii', + 'ギョー' => 'gyoo', + 'ギュー' => 'gyuu', + 'ミャー' => 'myaa', + 'ミェー' => 'myee', + 'ミィー' => 'myii', + 'ミョー' => 'myoo', + 'ミュー' => 'myuu', + 'ニャー' => 'nyaa', + 'ニェー' => 'nyee', + 'ニィー' => 'nyii', + 'ニョー' => 'nyoo', + 'ニュー' => 'nyuu', + 'リャー' => 'ryaa', + 'リェー' => 'ryee', + 'リィー' => 'ryii', + 'リョー' => 'ryoo', + 'リュー' => 'ryuu', + 'シャー' => 'shaa', + 'シェー' => 'shee', + 'シー' => 'shii', + 'ショー' => 'shoo', + 'シュー' => 'shuu', + 'ジャー' => 'jaa', + 'ジェー' => 'jee', + 'ジー' => 'jii', + 'ジョー' => 'joo', + 'ジュー' => 'juu', + 'スァー' => 'swaa', + 'スェー' => 'swee', + 'スィー' => 'swii', + 'スォー' => 'swoo', + 'スゥー' => 'swuu', + 'デァー' => 'daa', + 'デェー' => 'dee', + 'ディー' => 'dii', + 'デォー' => 'doo', + 'デゥー' => 'duu', + 'チャー' => 'chaa', + 'チェー' => 'chee', + 'チー' => 'chii', + 'チョー' => 'choo', + 'チュー' => 'chuu', + 'ヂャー' => 'dyaa', + 'ヂェー' => 'dyee', + 'ヂィー' => 'dyii', + 'ヂョー' => 'dyoo', + 'ヂュー' => 'dyuu', + 'ツャー' => 'tsaa', + 'ツェー' => 'tsee', + 'ツィー' => 'tsii', + 'ツョー' => 'tsoo', + 'ツー' => 'tsuu', + 'トァー' => 'twaa', + 'トェー' => 'twee', + 'トィー' => 'twii', + 'トォー' => 'twoo', + 'トゥー' => 'twuu', + 'ドァー' => 'dwaa', + 'ドェー' => 'dwee', + 'ドィー' => 'dwii', + 'ドォー' => 'dwoo', + 'ドゥー' => 'dwuu', + 'ウァー' => 'whaa', + 'ウェー' => 'whee', + 'ウィー' => 'whii', + 'ウォー' => 'whoo', + 'ウゥー' => 'whuu', + 'ヴャー' => 'vyaa', + 'ヴェー' => 'vyee', + 'ヴィー' => 'vyii', + 'ヴョー' => 'vyoo', + 'ヴュー' => 'vyuu', + 'ヴァー' => 'vaa', + 'ヴェー' => 'vee', + 'ヴィー' => 'vii', + 'ヴォー' => 'voo', + 'ヴー' => 'vuu', + 'ウェー' => 'wee', + 'ウィー' => 'wii', + 'イェー' => 'yee', + 'ティー' => 'tii', + 'ヂィー' => 'dii', + + // 3 character syllables - doubled consonants + 'ッビャ' => 'bbya', + 'ッビェ' => 'bbye', + 'ッビィ' => 'bbyi', + 'ッビョ' => 'bbyo', + 'ッビュ' => 'bbyu', + 'ッピャ' => 'ppya', + 'ッピェ' => 'ppye', + 'ッピィ' => 'ppyi', + 'ッピョ' => 'ppyo', + 'ッピュ' => 'ppyu', + 'ッキャ' => 'kkya', + 'ッキェ' => 'kkye', + 'ッキィ' => 'kkyi', + 'ッキョ' => 'kkyo', + 'ッキュ' => 'kkyu', + 'ッギャ' => 'ggya', + 'ッギェ' => 'ggye', + 'ッギィ' => 'ggyi', + 'ッギョ' => 'ggyo', + 'ッギュ' => 'ggyu', + 'ッミャ' => 'mmya', + 'ッミェ' => 'mmye', + 'ッミィ' => 'mmyi', + 'ッミョ' => 'mmyo', + 'ッミュ' => 'mmyu', + 'ッニャ' => 'nnya', + 'ッニェ' => 'nnye', + 'ッニィ' => 'nnyi', + 'ッニョ' => 'nnyo', + 'ッニュ' => 'nnyu', + 'ッリャ' => 'rrya', + 'ッリェ' => 'rrye', + 'ッリィ' => 'rryi', + 'ッリョ' => 'rryo', + 'ッリュ' => 'rryu', + 'ッシャ' => 'ssha', + 'ッシェ' => 'sshe', + 'ッシ' => 'sshi', + 'ッショ' => 'ssho', + 'ッシュ' => 'sshu', + 'ッチャ' => 'ccha', + 'ッチェ' => 'cche', + 'ッチ' => 'cchi', + 'ッチョ' => 'ccho', + 'ッチュ' => 'cchu', + 'ッティ' => 'tti', + 'ッヂィ' => 'ddi', + + // 3 character syllables - doubled vowel and consonants + 'ッバー' => 'bbaa', + 'ッベー' => 'bbee', + 'ッビー' => 'bbii', + 'ッボー' => 'bboo', + 'ッブー' => 'bbuu', + 'ッパー' => 'ppaa', + 'ッペー' => 'ppee', + 'ッピー' => 'ppii', + 'ッポー' => 'ppoo', + 'ップー' => 'ppuu', + 'ッケー' => 'kkee', + 'ッキー' => 'kkii', + 'ッコー' => 'kkoo', + 'ックー' => 'kkuu', + 'ッカー' => 'kkaa', + 'ッガー' => 'ggaa', + 'ッゲー' => 'ggee', + 'ッギー' => 'ggii', + 'ッゴー' => 'ggoo', + 'ッグー' => 'gguu', + 'ッマー' => 'maa', + 'ッメー' => 'mee', + 'ッミー' => 'mii', + 'ッモー' => 'moo', + 'ッムー' => 'muu', + 'ッナー' => 'nnaa', + 'ッネー' => 'nnee', + 'ッニー' => 'nnii', + 'ッノー' => 'nnoo', + 'ッヌー' => 'nnuu', + 'ッラー' => 'rraa', + 'ッレー' => 'rree', + 'ッリー' => 'rrii', + 'ッロー' => 'rroo', + 'ッルー' => 'rruu', + 'ッサー' => 'ssaa', + 'ッセー' => 'ssee', + 'ッシー' => 'sshii', + 'ッソー' => 'ssoo', + 'ッスー' => 'ssuu', + 'ッザー' => 'zzaa', + 'ッゼー' => 'zzee', + 'ッジー' => 'jjii', + 'ッゾー' => 'zzoo', + 'ッズー' => 'zzuu', + 'ッター' => 'ttaa', + 'ッテー' => 'ttee', + 'ッチー' => 'chii', + 'ットー' => 'ttoo', + 'ッツー' => 'ttsuu', + 'ッダー' => 'ddaa', + 'ッデー' => 'ddee', + 'ッヂー' => 'ddii', + 'ッドー' => 'ddoo', + 'ッヅー' => 'dduu', + + // 2 character syllables - normal + 'ファ' => 'fa', + 'フェ' => 'fe', + 'フィ' => 'fi', + 'フォ' => 'fo', + 'フゥ' => 'fu', + // 'フャ'=>'fya', + // 'フェ'=>'fye', + // 'フィ'=>'fyi', + // 'フョ'=>'fyo', + // 'フュ'=>'fyu', + 'フャ' => 'fa', + 'フェ' => 'fe', + 'フィ' => 'fi', + 'フョ' => 'fo', + 'フュ' => 'fu', + 'ヒャ' => 'hya', + 'ヒェ' => 'hye', + 'ヒィ' => 'hyi', + 'ヒョ' => 'hyo', + 'ヒュ' => 'hyu', + 'ビャ' => 'bya', + 'ビェ' => 'bye', + 'ビィ' => 'byi', + 'ビョ' => 'byo', + 'ビュ' => 'byu', + 'ピャ' => 'pya', + 'ピェ' => 'pye', + 'ピィ' => 'pyi', + 'ピョ' => 'pyo', + 'ピュ' => 'pyu', + 'キャ' => 'kya', + 'キェ' => 'kye', + 'キィ' => 'kyi', + 'キョ' => 'kyo', + 'キュ' => 'kyu', + 'ギャ' => 'gya', + 'ギェ' => 'gye', + 'ギィ' => 'gyi', + 'ギョ' => 'gyo', + 'ギュ' => 'gyu', + 'ミャ' => 'mya', + 'ミェ' => 'mye', + 'ミィ' => 'myi', + 'ミョ' => 'myo', + 'ミュ' => 'myu', + 'ニャ' => 'nya', + 'ニェ' => 'nye', + 'ニィ' => 'nyi', + 'ニョ' => 'nyo', + 'ニュ' => 'nyu', + 'リャ' => 'rya', + 'リェ' => 'rye', + 'リィ' => 'ryi', + 'リョ' => 'ryo', + 'リュ' => 'ryu', + 'シャ' => 'sha', + 'シェ' => 'she', + 'ショ' => 'sho', + 'シュ' => 'shu', + 'ジャ' => 'ja', + 'ジェ' => 'je', + 'ジョ' => 'jo', + 'ジュ' => 'ju', + 'スァ' => 'swa', + 'スェ' => 'swe', + 'スィ' => 'swi', + 'スォ' => 'swo', + 'スゥ' => 'swu', + 'デァ' => 'da', + 'デェ' => 'de', + 'ディ' => 'di', + 'デォ' => 'do', + 'デゥ' => 'du', + 'チャ' => 'cha', + 'チェ' => 'che', + 'チ' => 'chi', + 'チョ' => 'cho', + 'チュ' => 'chu', + // 'ヂャ'=>'dya', + // 'ヂェ'=>'dye', + // 'ヂィ'=>'dyi', + // 'ヂョ'=>'dyo', + // 'ヂュ'=>'dyu', + 'ツャ' => 'tsa', + 'ツェ' => 'tse', + 'ツィ' => 'tsi', + 'ツョ' => 'tso', + 'ツ' => 'tsu', + 'トァ' => 'twa', + 'トェ' => 'twe', + 'トィ' => 'twi', + 'トォ' => 'two', + 'トゥ' => 'twu', + 'ドァ' => 'dwa', + 'ドェ' => 'dwe', + 'ドィ' => 'dwi', + 'ドォ' => 'dwo', + 'ドゥ' => 'dwu', + 'ウァ' => 'wha', + 'ウェ' => 'whe', + 'ウィ' => 'whi', + 'ウォ' => 'who', + 'ウゥ' => 'whu', + 'ヴャ' => 'vya', + 'ヴェ' => 'vye', + 'ヴィ' => 'vyi', + 'ヴョ' => 'vyo', + 'ヴュ' => 'vyu', + 'ヴァ' => 'va', + 'ヴェ' => 've', + 'ヴィ' => 'vi', + 'ヴォ' => 'vo', + 'ヴ' => 'vu', + 'ウェ' => 'we', + 'ウィ' => 'wi', + 'イェ' => 'ye', + 'ティ' => 'ti', + 'ヂィ' => 'di', + + // 2 character syllables - doubled vocal + 'アー' => 'aa', + 'エー' => 'ee', + 'イー' => 'ii', + 'オー' => 'oo', + 'ウー' => 'uu', + 'ダー' => 'daa', + 'デー' => 'dee', + 'ヂー' => 'dii', + 'ドー' => 'doo', + 'ヅー' => 'duu', + 'ハー' => 'haa', + 'ヘー' => 'hee', + 'ヒー' => 'hii', + 'ホー' => 'hoo', + 'フー' => 'fuu', + 'バー' => 'baa', + 'ベー' => 'bee', + 'ビー' => 'bii', + 'ボー' => 'boo', + 'ブー' => 'buu', + 'パー' => 'paa', + 'ペー' => 'pee', + 'ピー' => 'pii', + 'ポー' => 'poo', + 'プー' => 'puu', + 'ケー' => 'kee', + 'キー' => 'kii', + 'コー' => 'koo', + 'クー' => 'kuu', + 'カー' => 'kaa', + 'ガー' => 'gaa', + 'ゲー' => 'gee', + 'ギー' => 'gii', + 'ゴー' => 'goo', + 'グー' => 'guu', + 'マー' => 'maa', + 'メー' => 'mee', + 'ミー' => 'mii', + 'モー' => 'moo', + 'ムー' => 'muu', + 'ナー' => 'naa', + 'ネー' => 'nee', + 'ニー' => 'nii', + 'ノー' => 'noo', + 'ヌー' => 'nuu', + 'ラー' => 'raa', + 'レー' => 'ree', + 'リー' => 'rii', + 'ロー' => 'roo', + 'ルー' => 'ruu', + 'サー' => 'saa', + 'セー' => 'see', + 'シー' => 'shii', + 'ソー' => 'soo', + 'スー' => 'suu', + 'ザー' => 'zaa', + 'ゼー' => 'zee', + 'ジー' => 'jii', + 'ゾー' => 'zoo', + 'ズー' => 'zuu', + 'ター' => 'taa', + 'テー' => 'tee', + 'チー' => 'chii', + 'トー' => 'too', + 'ツー' => 'tsuu', + 'ワー' => 'waa', + 'ヲー' => 'woo', + 'ヤー' => 'yaa', + 'ヨー' => 'yoo', + 'ユー' => 'yuu', + 'ヵー' => 'kaa', + 'ヶー' => 'kee', + // old characters + 'ヱー' => 'wee', + 'ヰー' => 'wii', + + // seperate katakana 'n' + 'ンア' => 'n_a', + 'ンエ' => 'n_e', + 'ンイ' => 'n_i', + 'ンオ' => 'n_o', + 'ンウ' => 'n_u', + 'ンヤ' => 'n_ya', + 'ンヨ' => 'n_yo', + 'ンユ' => 'n_yu', + + // 2 character syllables - doubled consonants + 'ッバ' => 'bba', + 'ッベ' => 'bbe', + 'ッビ' => 'bbi', + 'ッボ' => 'bbo', + 'ッブ' => 'bbu', + 'ッパ' => 'ppa', + 'ッペ' => 'ppe', + 'ッピ' => 'ppi', + 'ッポ' => 'ppo', + 'ップ' => 'ppu', + 'ッケ' => 'kke', + 'ッキ' => 'kki', + 'ッコ' => 'kko', + 'ック' => 'kku', + 'ッカ' => 'kka', + 'ッガ' => 'gga', + 'ッゲ' => 'gge', + 'ッギ' => 'ggi', + 'ッゴ' => 'ggo', + 'ッグ' => 'ggu', + 'ッマ' => 'ma', + 'ッメ' => 'me', + 'ッミ' => 'mi', + 'ッモ' => 'mo', + 'ッム' => 'mu', + 'ッナ' => 'nna', + 'ッネ' => 'nne', + 'ッニ' => 'nni', + 'ッノ' => 'nno', + 'ッヌ' => 'nnu', + 'ッラ' => 'rra', + 'ッレ' => 'rre', + 'ッリ' => 'rri', + 'ッロ' => 'rro', + 'ッル' => 'rru', + 'ッサ' => 'ssa', + 'ッセ' => 'sse', + 'ッシ' => 'sshi', + 'ッソ' => 'sso', + 'ッス' => 'ssu', + 'ッザ' => 'zza', + 'ッゼ' => 'zze', + 'ッジ' => 'jji', + 'ッゾ' => 'zzo', + 'ッズ' => 'zzu', + 'ッタ' => 'tta', + 'ッテ' => 'tte', + 'ッチ' => 'cchi', + 'ット' => 'tto', + 'ッツ' => 'ttsu', + 'ッダ' => 'dda', + 'ッデ' => 'dde', + 'ッヂ' => 'ddi', + 'ッド' => 'ddo', + 'ッヅ' => 'ddu', + + // 1 character syllables + 'ア' => 'a', + 'エ' => 'e', + 'イ' => 'i', + 'オ' => 'o', + 'ウ' => 'u', + 'ン' => 'n', + 'ハ' => 'ha', + 'ヘ' => 'he', + 'ヒ' => 'hi', + 'ホ' => 'ho', + 'フ' => 'fu', + 'バ' => 'ba', + 'ベ' => 'be', + 'ビ' => 'bi', + 'ボ' => 'bo', + 'ブ' => 'bu', + 'パ' => 'pa', + 'ペ' => 'pe', + 'ピ' => 'pi', + 'ポ' => 'po', + 'プ' => 'pu', + 'ケ' => 'ke', + 'キ' => 'ki', + 'コ' => 'ko', + 'ク' => 'ku', + 'カ' => 'ka', + 'ガ' => 'ga', + 'ゲ' => 'ge', + 'ギ' => 'gi', + 'ゴ' => 'go', + 'グ' => 'gu', + 'マ' => 'ma', + 'メ' => 'me', + 'ミ' => 'mi', + 'モ' => 'mo', + 'ム' => 'mu', + 'ナ' => 'na', + 'ネ' => 'ne', + 'ニ' => 'ni', + 'ノ' => 'no', + 'ヌ' => 'nu', + 'ラ' => 'ra', + 'レ' => 're', + 'リ' => 'ri', + 'ロ' => 'ro', + 'ル' => 'ru', + 'サ' => 'sa', + 'セ' => 'se', + 'シ' => 'shi', + 'ソ' => 'so', + 'ス' => 'su', + 'ザ' => 'za', + 'ゼ' => 'ze', + 'ジ' => 'ji', + 'ゾ' => 'zo', + 'ズ' => 'zu', + 'タ' => 'ta', + 'テ' => 'te', + 'チ' => 'chi', + 'ト' => 'to', + 'ツ' => 'tsu', + 'ダ' => 'da', + 'デ' => 'de', + 'ヂ' => 'di', + 'ド' => 'do', + 'ヅ' => 'du', + 'ワ' => 'wa', + 'ヲ' => 'wo', + 'ヤ' => 'ya', + 'ヨ' => 'yo', + 'ユ' => 'yu', + 'ヵ' => 'ka', + 'ヶ' => 'ke', + // old characters + 'ヱ' => 'we', + 'ヰ' => 'wi', + + // convert what's left (probably only kicks in when something's missing above) + 'ァ' => 'a', + 'ェ' => 'e', + 'ィ' => 'i', + 'ォ' => 'o', + 'ゥ' => 'u', + 'ャ' => 'ya', + 'ョ' => 'yo', + 'ュ' => 'yu', + + // special characters + '・' => '_', + '、' => '_', + 'ー' => '_', + // when used with hiragana (seldom), this character would not be converted otherwise + + // 'ラ'=>'la', + // 'レ'=>'le', + // 'リ'=>'li', + // 'ロ'=>'lo', + // 'ル'=>'lu', + // 'チャ'=>'cya', + // 'チェ'=>'cye', + // 'チィ'=>'cyi', + // 'チョ'=>'cyo', + // 'チュ'=>'cyu', + // 'デャ'=>'dha', + // 'デェ'=>'dhe', + // 'ディ'=>'dhi', + // 'デョ'=>'dho', + // 'デュ'=>'dhu', + // 'リャ'=>'lya', + // 'リェ'=>'lye', + // 'リィ'=>'lyi', + // 'リョ'=>'lyo', + // 'リュ'=>'lyu', + // 'テャ'=>'tha', + // 'テェ'=>'the', + // 'ティ'=>'thi', + // 'テョ'=>'tho', + // 'テュ'=>'thu', + // 'ファ'=>'fwa', + // 'フェ'=>'fwe', + // 'フィ'=>'fwi', + // 'フォ'=>'fwo', + // 'フゥ'=>'fwu', + // 'チャ'=>'tya', + // 'チェ'=>'tye', + // 'チィ'=>'tyi', + // 'チョ'=>'tyo', + // 'チュ'=>'tyu', + // 'ジャ'=>'jya', + // 'ジェ'=>'jye', + // 'ジィ'=>'jyi', + // 'ジョ'=>'jyo', + // 'ジュ'=>'jyu', + // 'ジャ'=>'zha', + // 'ジェ'=>'zhe', + // 'ジィ'=>'zhi', + // 'ジョ'=>'zho', + // 'ジュ'=>'zhu', + // 'ジャ'=>'zya', + // 'ジェ'=>'zye', + // 'ジィ'=>'zyi', + // 'ジョ'=>'zyo', + // 'ジュ'=>'zyu', + // 'シャ'=>'sya', + // 'シェ'=>'sye', + // 'シィ'=>'syi', + // 'ショ'=>'syo', + // 'シュ'=>'syu', + // 'シ'=>'ci', + // 'フ'=>'hu', + // 'シ'=>'si', + // 'チ'=>'ti', + // 'ツ'=>'tu', + // 'イ'=>'yi', + // 'ヂ'=>'dzi', + + // "Greeklish" + 'Γ' => 'G', + 'Δ' => 'E', + 'Θ' => 'Th', + 'Λ' => 'L', + 'Ξ' => 'X', + 'Π' => 'P', + 'Σ' => 'S', + 'Φ' => 'F', + 'Ψ' => 'Ps', + 'γ' => 'g', + 'δ' => 'e', + 'θ' => 'th', + 'λ' => 'l', + 'ξ' => 'x', + 'π' => 'p', + 'σ' => 's', + 'φ' => 'f', + 'ψ' => 'ps', + + // Thai + 'ก' => 'k', + 'ข' => 'kh', + 'ฃ' => 'kh', + 'ค' => 'kh', + 'ฅ' => 'kh', + 'ฆ' => 'kh', + 'ง' => 'ng', + 'จ' => 'ch', + 'ฉ' => 'ch', + 'ช' => 'ch', + 'ซ' => 's', + 'ฌ' => 'ch', + 'ญ' => 'y', + 'ฎ' => 'd', + 'ฏ' => 't', + 'ฐ' => 'th', + 'ฑ' => 'd', + 'ฒ' => 'th', + 'ณ' => 'n', + 'ด' => 'd', + 'ต' => 't', + 'ถ' => 'th', + 'ท' => 'th', + 'ธ' => 'th', + 'น' => 'n', + 'บ' => 'b', + 'ป' => 'p', + 'ผ' => 'ph', + 'ฝ' => 'f', + 'พ' => 'ph', + 'ฟ' => 'f', + 'ภ' => 'ph', + 'ม' => 'm', + 'ย' => 'y', + 'ร' => 'r', + 'ฤ' => 'rue', + 'ฤๅ' => 'rue', + 'ล' => 'l', + 'ฦ' => 'lue', + 'ฦๅ' => 'lue', + 'ว' => 'w', + 'ศ' => 's', + 'ษ' => 's', + 'ส' => 's', + 'ห' => 'h', + 'ฬ' => 'l', + 'ฮ' => 'h', + 'ะ' => 'a', + 'ั' => 'a', + 'รร' => 'a', + 'า' => 'a', + 'ๅ' => 'a', + 'ำ' => 'am', + 'ํา' => 'am', + 'ิ' => 'i', + 'ี' => 'i', + 'ึ' => 'ue', + 'ี' => 'ue', + 'ุ' => 'u', + 'ู' => 'u', + 'เ' => 'e', + 'แ' => 'ae', + 'โ' => 'o', + 'อ' => 'o', + 'ียะ' => 'ia', + 'ีย' => 'ia', + 'ือะ' => 'uea', + 'ือ' => 'uea', + 'ัวะ' => 'ua', + 'ัว' => 'ua', + 'ใ' => 'ai', + 'ไ' => 'ai', + 'ัย' => 'ai', + 'าย' => 'ai', + 'าว' => 'ao', + 'ุย' => 'ui', + 'อย' => 'oi', + 'ือย' => 'ueai', + 'วย' => 'uai', + 'ิว' => 'io', + '็ว' => 'eo', + 'ียว' => 'iao', + '่' => '', + '้' => '', + '๊' => '', + '๋' => '', + '็' => '', + '์' => '', + '๎' => '', + 'ํ' => '', + 'ฺ' => '', + 'ๆ' => '2', + '๏' => 'o', + 'ฯ' => '-', + '๚' => '-', + '๛' => '-', + '๐' => '0', + '๑' => '1', + '๒' => '2', + '๓' => '3', + '๔' => '4', + '๕' => '5', + '๖' => '6', + '๗' => '7', + '๘' => '8', + '๙' => '9', + + // Korean + 'ㄱ' => 'k', 'ㅋ' => 'kh', + 'ㄲ' => 'kk', + 'ㄷ' => 't', + 'ㅌ' => 'th', + 'ㄸ' => 'tt', + 'ㅂ' => 'p', + 'ㅍ' => 'ph', + 'ㅃ' => 'pp', + 'ㅈ' => 'c', + 'ㅊ' => 'ch', + 'ㅉ' => 'cc', + 'ㅅ' => 's', + 'ㅆ' => 'ss', + 'ㅎ' => 'h', + 'ㅇ' => 'ng', + 'ㄴ' => 'n', + 'ㄹ' => 'l', + 'ㅁ' => 'm', + 'ㅏ' => 'a', + 'ㅓ' => 'e', + 'ㅗ' => 'o', + 'ㅜ' => 'wu', + 'ㅡ' => 'u', + 'ㅣ' => 'i', + 'ㅐ' => 'ay', + 'ㅔ' => 'ey', + 'ㅚ' => 'oy', + 'ㅘ' => 'wa', + 'ㅝ' => 'we', + 'ㅟ' => 'wi', + 'ㅙ' => 'way', + 'ㅞ' => 'wey', + 'ㅢ' => 'uy', + 'ㅑ' => 'ya', + 'ㅕ' => 'ye', + 'ㅛ' => 'oy', + 'ㅠ' => 'yu', + 'ㅒ' => 'yay', + 'ㅖ' => 'yey', +]; diff --git a/platform/www/inc/Utf8/tables/specials.php b/platform/www/inc/Utf8/tables/specials.php new file mode 100644 index 0000000..f6243bc --- /dev/null +++ b/platform/www/inc/Utf8/tables/specials.php @@ -0,0 +1,615 @@ +<?php +/** + * UTF-8 array of common special characters + * + * This array should contain all special characters (not a letter or digit) + * defined in the various local charsets - it's not a complete list of non-alphanum + * characters in UTF-8. It's not perfect but should match most cases of special + * chars. + * + * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! + * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a + * + * @author Andreas Gohr <andi@splitbrain.org> + * @see \dokuwiki\Utf8\Clean::stripspecials() + */ +return [ + 0x1a, // + 0x1b, // + 0x1c, // + 0x1d, // + 0x1e, // + 0x1f, // + 0x20, // <space> + 0x21, // ! + 0x22, // " + 0x23, // # + 0x24, // $ + 0x25, // % + 0x26, // & + 0x27, // ' + 0x28, // ( + 0x29, // ) + 0x2b, // + + 0x2c, // , + 0x2f, // / + 0x3b, // ; + 0x3c, // < + 0x3d, // = + 0x3e, // > + 0x3f, // ? + 0x40, // @ + 0x5b, // [ + 0x5c, // \ + 0x5d, // ] + 0x5e, // ^ + 0x60, // ` + 0x7b, // { + 0x7c, // | + 0x7d, // } + 0x7e, // ~ + 0x7f, // + 0x80, // + 0x81, // + 0x82, // + 0x83, // + 0x84, // + 0x85, //
+ 0x86, // + 0x87, // + 0x88, // + 0x89, // + 0x8a, // + 0x8b, // + 0x8c, // + 0x8d, // + 0x8e, // + 0x8f, // + 0x90, // + 0x91, // + 0x92, // + 0x93, // + 0x94, // + 0x95, // + 0x96, // + 0x97, // + 0x98, // + 0x99, // + 0x9a, // + 0x9b, // + 0x9c, // + 0x9d, // + 0x9e, // + 0x9f, // + 0xa0, // + 0xa1, // ¡ + 0xa2, // ¢ + 0xa3, // £ + 0xa4, // ¤ + 0xa5, // ¥ + 0xa6, // ¦ + 0xa7, // § + 0xa8, // ¨ + 0xa9, // © + 0xaa, // ª + 0xab, // « + 0xac, // ¬ + 0xad, // + 0xae, // ® + 0xaf, // ¯ + 0xb0, // ° + 0xb1, // ± + 0xb2, // ² + 0xb3, // ³ + 0xb4, // ´ + 0xb5, // µ + 0xb6, // ¶ + 0xb7, // · + 0xb8, // ¸ + 0xb9, // ¹ + 0xba, // º + 0xbb, // » + 0xbc, // ¼ + 0xbd, // ½ + 0xbe, // ¾ + 0xbf, // ¿ + 0xd7, // × + 0xf7, // ÷ + 0x2c7, // ˇ + 0x2d8, // ˘ + 0x2d9, // ˙ + 0x2da, // ˚ + 0x2db, // ˛ + 0x2dc, // ˜ + 0x2dd, // ˝ + 0x300, // ̀ + 0x301, // ́ + 0x303, // ̃ + 0x309, // ̉ + 0x323, // ̣ + 0x384, // ΄ + 0x385, // ΅ + 0x387, // · + 0x5b0, // ְ + 0x5b1, // ֱ + 0x5b2, // ֲ + 0x5b3, // ֳ + 0x5b4, // ִ + 0x5b5, // ֵ + 0x5b6, // ֶ + 0x5b7, // ַ + 0x5b8, // ָ + 0x5b9, // ֹ + 0x5bb, // ֻ + 0x5bc, // ּ + 0x5bd, // ֽ + 0x5be, // ־ + 0x5bf, // ֿ + 0x5c0, // ׀ + 0x5c1, // ׁ + 0x5c2, // ׂ + 0x5c3, // ׃ + 0x5f3, // ׳ + 0x5f4, // ״ + 0x60c, // ، + 0x61b, // ؛ + 0x61f, // ؟ + 0x640, // ـ + 0x64b, // ً + 0x64c, // ٌ + 0x64d, // ٍ + 0x64e, // َ + 0x64f, // ُ + 0x650, // ِ + 0x651, // ّ + 0x652, // ْ + 0x66a, // ٪ + 0xe3f, // ฿ + 0x200c, // + 0x200d, // + 0x200e, // + 0x200f, // + 0x2013, // – + 0x2014, // — + 0x2015, // ― + 0x2017, // ‗ + 0x2018, // ‘ + 0x2019, // ’ + 0x201a, // ‚ + 0x201c, // “ + 0x201d, // ” + 0x201e, // „ + 0x2020, // † + 0x2021, // ‡ + 0x2022, // • + 0x2026, // … + 0x2030, // ‰ + 0x2032, // ′ + 0x2033, // ″ + 0x2039, // ‹ + 0x203a, // › + 0x2044, // ⁄ + 0x20a7, // ₧ + 0x20aa, // ₪ + 0x20ab, // ₫ + 0x20ac, // € + 0x2116, // № + 0x2118, // ℘ + 0x2122, // ™ + 0x2126, // Ω + 0x2135, // ℵ + 0x2190, // ← + 0x2191, // ↑ + 0x2192, // → + 0x2193, // ↓ + 0x2194, // ↔ + 0x2195, // ↕ + 0x21b5, // ↵ + 0x21d0, // ⇐ + 0x21d1, // ⇑ + 0x21d2, // ⇒ + 0x21d3, // ⇓ + 0x21d4, // ⇔ + 0x2200, // ∀ + 0x2202, // ∂ + 0x2203, // ∃ + 0x2205, // ∅ + 0x2206, // ∆ + 0x2207, // ∇ + 0x2208, // ∈ + 0x2209, // ∉ + 0x220b, // ∋ + 0x220f, // ∏ + 0x2211, // ∑ + 0x2212, // − + 0x2215, // ∕ + 0x2217, // ∗ + 0x2219, // ∙ + 0x221a, // √ + 0x221d, // ∝ + 0x221e, // ∞ + 0x2220, // ∠ + 0x2227, // ∧ + 0x2228, // ∨ + 0x2229, // ∩ + 0x222a, // ∪ + 0x222b, // ∫ + 0x2234, // ∴ + 0x223c, // ∼ + 0x2245, // ≅ + 0x2248, // ≈ + 0x2260, // ≠ + 0x2261, // ≡ + 0x2264, // ≤ + 0x2265, // ≥ + 0x2282, // ⊂ + 0x2283, // ⊃ + 0x2284, // ⊄ + 0x2286, // ⊆ + 0x2287, // ⊇ + 0x2295, // ⊕ + 0x2297, // ⊗ + 0x22a5, // ⊥ + 0x22c5, // ⋅ + 0x2310, // ⌐ + 0x2320, // ⌠ + 0x2321, // ⌡ + 0x2329, // 〈 + 0x232a, // 〉 + 0x2469, // ⑩ + 0x2500, // ─ + 0x2502, // │ + 0x250c, // ┌ + 0x2510, // ┐ + 0x2514, // └ + 0x2518, // ┘ + 0x251c, // ├ + 0x2524, // ┤ + 0x252c, // ┬ + 0x2534, // ┴ + 0x253c, // ┼ + 0x2550, // ═ + 0x2551, // ║ + 0x2552, // ╒ + 0x2553, // ╓ + 0x2554, // ╔ + 0x2555, // ╕ + 0x2556, // ╖ + 0x2557, // ╗ + 0x2558, // ╘ + 0x2559, // ╙ + 0x255a, // ╚ + 0x255b, // ╛ + 0x255c, // ╜ + 0x255d, // ╝ + 0x255e, // ╞ + 0x255f, // ╟ + 0x2560, // ╠ + 0x2561, // ╡ + 0x2562, // ╢ + 0x2563, // ╣ + 0x2564, // ╤ + 0x2565, // ╥ + 0x2566, // ╦ + 0x2567, // ╧ + 0x2568, // ╨ + 0x2569, // ╩ + 0x256a, // ╪ + 0x256b, // ╫ + 0x256c, // ╬ + 0x2580, // ▀ + 0x2584, // ▄ + 0x2588, // █ + 0x258c, // ▌ + 0x2590, // ▐ + 0x2591, // ░ + 0x2592, // ▒ + 0x2593, // ▓ + 0x25a0, // ■ + 0x25b2, // ▲ + 0x25bc, // ▼ + 0x25c6, // ◆ + 0x25ca, // ◊ + 0x25cf, // ● + 0x25d7, // ◗ + 0x2605, // ★ + 0x260e, // ☎ + 0x261b, // ☛ + 0x261e, // ☞ + 0x2660, // ♠ + 0x2663, // ♣ + 0x2665, // ♥ + 0x2666, // ♦ + 0x2701, // ✁ + 0x2702, // ✂ + 0x2703, // ✃ + 0x2704, // ✄ + 0x2706, // ✆ + 0x2707, // ✇ + 0x2708, // ✈ + 0x2709, // ✉ + 0x270c, // ✌ + 0x270d, // ✍ + 0x270e, // ✎ + 0x270f, // ✏ + 0x2710, // ✐ + 0x2711, // ✑ + 0x2712, // ✒ + 0x2713, // ✓ + 0x2714, // ✔ + 0x2715, // ✕ + 0x2716, // ✖ + 0x2717, // ✗ + 0x2718, // ✘ + 0x2719, // ✙ + 0x271a, // ✚ + 0x271b, // ✛ + 0x271c, // ✜ + 0x271d, // ✝ + 0x271e, // ✞ + 0x271f, // ✟ + 0x2720, // ✠ + 0x2721, // ✡ + 0x2722, // ✢ + 0x2723, // ✣ + 0x2724, // ✤ + 0x2725, // ✥ + 0x2726, // ✦ + 0x2727, // ✧ + 0x2729, // ✩ + 0x272a, // ✪ + 0x272b, // ✫ + 0x272c, // ✬ + 0x272d, // ✭ + 0x272e, // ✮ + 0x272f, // ✯ + 0x2730, // ✰ + 0x2731, // ✱ + 0x2732, // ✲ + 0x2733, // ✳ + 0x2734, // ✴ + 0x2735, // ✵ + 0x2736, // ✶ + 0x2737, // ✷ + 0x2738, // ✸ + 0x2739, // ✹ + 0x273a, // ✺ + 0x273b, // ✻ + 0x273c, // ✼ + 0x273d, // ✽ + 0x273e, // ✾ + 0x273f, // ✿ + 0x2740, // ❀ + 0x2741, // ❁ + 0x2742, // ❂ + 0x2743, // ❃ + 0x2744, // ❄ + 0x2745, // ❅ + 0x2746, // ❆ + 0x2747, // ❇ + 0x2748, // ❈ + 0x2749, // ❉ + 0x274a, // ❊ + 0x274b, // ❋ + 0x274d, // ❍ + 0x274f, // ❏ + 0x2750, // ❐ + 0x2751, // ❑ + 0x2752, // ❒ + 0x2756, // ❖ + 0x2758, // ❘ + 0x2759, // ❙ + 0x275a, // ❚ + 0x275b, // ❛ + 0x275c, // ❜ + 0x275d, // ❝ + 0x275e, // ❞ + 0x2761, // ❡ + 0x2762, // ❢ + 0x2763, // ❣ + 0x2764, // ❤ + 0x2765, // ❥ + 0x2766, // ❦ + 0x2767, // ❧ + 0x277f, // ❿ + 0x2789, // ➉ + 0x2793, // ➓ + 0x2794, // ➔ + 0x2798, // ➘ + 0x2799, // ➙ + 0x279a, // ➚ + 0x279b, // ➛ + 0x279c, // ➜ + 0x279d, // ➝ + 0x279e, // ➞ + 0x279f, // ➟ + 0x27a0, // ➠ + 0x27a1, // ➡ + 0x27a2, // ➢ + 0x27a3, // ➣ + 0x27a4, // ➤ + 0x27a5, // ➥ + 0x27a6, // ➦ + 0x27a7, // ➧ + 0x27a8, // ➨ + 0x27a9, // ➩ + 0x27aa, // ➪ + 0x27ab, // ➫ + 0x27ac, // ➬ + 0x27ad, // ➭ + 0x27ae, // ➮ + 0x27af, // ➯ + 0x27b1, // ➱ + 0x27b2, // ➲ + 0x27b3, // ➳ + 0x27b4, // ➴ + 0x27b5, // ➵ + 0x27b6, // ➶ + 0x27b7, // ➷ + 0x27b8, // ➸ + 0x27b9, // ➹ + 0x27ba, // ➺ + 0x27bb, // ➻ + 0x27bc, // ➼ + 0x27bd, // ➽ + 0x27be, // ➾ + 0x3000, // + 0x3001, // 、 + 0x3002, // 。 + 0x3003, // 〃 + 0x3008, // 〈 + 0x3009, // 〉 + 0x300a, // 《 + 0x300b, // 》 + 0x300c, // 「 + 0x300d, // 」 + 0x300e, // 『 + 0x300f, // 』 + 0x3010, // 【 + 0x3011, // 】 + 0x3012, // 〒 + 0x3014, // 〔 + 0x3015, // 〕 + 0x3016, // 〖 + 0x3017, // 〗 + 0x3018, // 〘 + 0x3019, // 〙 + 0x301a, // 〚 + 0x301b, // 〛 + 0x3036, // 〶 + 0xf6d9, // + 0xf6da, // + 0xf6db, // + 0xf8d7, // + 0xf8d8, // + 0xf8d9, // + 0xf8da, // + 0xf8db, // + 0xf8dc, // + 0xf8dd, // + 0xf8de, // + 0xf8df, // + 0xf8e0, // + 0xf8e1, // + 0xf8e2, // + 0xf8e3, // + 0xf8e4, // + 0xf8e5, // + 0xf8e6, // + 0xf8e7, // + 0xf8e8, // + 0xf8e9, // + 0xf8ea, // + 0xf8eb, // + 0xf8ec, // + 0xf8ed, // + 0xf8ee, // + 0xf8ef, // + 0xf8f0, // + 0xf8f1, // + 0xf8f2, // + 0xf8f3, // + 0xf8f4, // + 0xf8f5, // + 0xf8f6, // + 0xf8f7, // + 0xf8f8, // + 0xf8f9, // + 0xf8fa, // + 0xf8fb, // + 0xf8fc, // + 0xf8fd, // + 0xf8fe, // + 0xfe7c, // ﹼ + 0xfe7d, // ﹽ + 0xff01, // ! + 0xff02, // " + 0xff03, // # + 0xff04, // $ + 0xff05, // % + 0xff06, // & + 0xff07, // ' + 0xff08, // ( + 0xff09, // ) + 0xff09, // ) + 0xff0a, // * + 0xff0b, // + + 0xff0c, // , + 0xff0d, // - + 0xff0e, // . + 0xff0f, // / + 0xff1a, // : + 0xff1b, // ; + 0xff1c, // < + 0xff1d, // = + 0xff1e, // > + 0xff1f, // ? + 0xff20, // @ + 0xff3b, // [ + 0xff3c, // \ + 0xff3d, // ] + 0xff3e, // ^ + 0xff40, // ` + 0xff5b, // { + 0xff5c, // | + 0xff5d, // } + 0xff5e, // ~ + 0xff5f, // ⦅ + 0xff60, // ⦆ + 0xff61, // 。 + 0xff62, // 「 + 0xff63, // 」 + 0xff64, // 、 + 0xff65, // ・ + 0xffe0, // ¢ + 0xffe1, // £ + 0xffe2, // ¬ + 0xffe3, //  ̄ + 0xffe4, // ¦ + 0xffe5, // ¥ + 0xffe6, // ₩ + 0xffe8, // │ + 0xffe9, // ← + 0xffea, // ↑ + 0xffeb, // → + 0xffec, // ↓ + 0xffed, // ■ + 0xffee, // ○ + 0x1d6fc, // 𝛼 + 0x1d6fd, // 𝛽 + 0x1d6fe, // 𝛾 + 0x1d6ff, // 𝛿 + 0x1d700, // 𝜀 + 0x1d701, // 𝜁 + 0x1d702, // 𝜂 + 0x1d703, // 𝜃 + 0x1d704, // 𝜄 + 0x1d705, // 𝜅 + 0x1d706, // 𝜆 + 0x1d707, // 𝜇 + 0x1d708, // 𝜈 + 0x1d709, // 𝜉 + 0x1d70a, // 𝜊 + 0x1d70b, // 𝜋 + 0x1d70c, // 𝜌 + 0x1d70d, // 𝜍 + 0x1d70e, // 𝜎 + 0x1d70f, // 𝜏 + 0x1d710, // 𝜐 + 0x1d711, // 𝜑 + 0x1d712, // 𝜒 + 0x1d713, // 𝜓 + 0x1d714, // 𝜔 + 0x1d715, // 𝜕 + 0x1d716, // 𝜖 + 0x1d717, // 𝜗 + 0x1d718, // 𝜘 + 0x1d719, // 𝜙 + 0x1d71a, // 𝜚 + 0x1d71b, // 𝜛 + 0xc2a0, // 슠 + 0xe28087, // + 0xe280af, // + 0xe281a0, // + 0xefbbbf, // +]; diff --git a/platform/www/inc/Utf8/tables/upperaccents.php b/platform/www/inc/Utf8/tables/upperaccents.php new file mode 100644 index 0000000..e6e48de --- /dev/null +++ b/platform/www/inc/Utf8/tables/upperaccents.php @@ -0,0 +1,114 @@ +<?php +/** + * UTF-8 lookup table for upper case accented letters + * + * This lookuptable defines replacements for accented characters from the ASCII-7 + * range. This are upper case letters only. + * + * @author Andreas Gohr <andi@splitbrain.org> + * @see \dokuwiki\Utf8\Clean::deaccent() + */ +return [ + 'Á' => 'A', + 'À' => 'A', + 'Ă' => 'A', + 'Â' => 'A', + 'Å' => 'A', + 'Ä' => 'Ae', + 'Ã' => 'A', + 'Ą' => 'A', + 'Ā' => 'A', + 'Æ' => 'Ae', + 'Ḃ' => 'B', + 'Ć' => 'C', + 'Ĉ' => 'C', + 'Č' => 'C', + 'Ċ' => 'C', + 'Ç' => 'C', + 'Ď' => 'D', + 'Ḋ' => 'D', + 'Đ' => 'D', + 'Ð' => 'Dh', + 'É' => 'E', + 'È' => 'E', + 'Ĕ' => 'E', + 'Ê' => 'E', + 'Ě' => 'E', + 'Ë' => 'E', + 'Ė' => 'E', + 'Ę' => 'E', + 'Ē' => 'E', + 'Ḟ' => 'F', + 'Ƒ' => 'F', + 'Ğ' => 'G', + 'Ĝ' => 'G', + 'Ġ' => 'G', + 'Ģ' => 'G', + 'Ĥ' => 'H', + 'Ħ' => 'H', + 'Í' => 'I', + 'Ì' => 'I', + 'Î' => 'I', + 'Ï' => 'I', + 'Ĩ' => 'I', + 'Į' => 'I', + 'Ī' => 'I', + 'Ĵ' => 'J', + 'Ķ' => 'K', + 'Ĺ' => 'L', + 'Ľ' => 'L', + 'Ļ' => 'L', + 'Ł' => 'L', + 'Ṁ' => 'M', + 'Ń' => 'N', + 'Ň' => 'N', + 'Ñ' => 'N', + 'Ņ' => 'N', + 'Ó' => 'O', + 'Ò' => 'O', + 'Ô' => 'O', + 'Ö' => 'Oe', + 'Ő' => 'O', + 'Õ' => 'O', + 'Ø' => 'O', + 'Ō' => 'O', + 'Ơ' => 'O', + 'Ṗ' => 'P', + 'Ŕ' => 'R', + 'Ř' => 'R', + 'Ŗ' => 'R', + 'Ś' => 'S', + 'Ŝ' => 'S', + 'Š' => 'S', + 'Ṡ' => 'S', + 'Ş' => 'S', + 'Ș' => 'S', + 'Ť' => 'T', + 'Ṫ' => 'T', + 'Ţ' => 'T', + 'Ț' => 'T', + 'Ŧ' => 'T', + 'Ú' => 'U', + 'Ù' => 'U', + 'Ŭ' => 'U', + 'Û' => 'U', + 'Ů' => 'U', + 'Ü' => 'Ue', + 'Ű' => 'U', + 'Ũ' => 'U', + 'Ų' => 'U', + 'Ū' => 'U', + 'Ư' => 'U', + 'Ẃ' => 'W', + 'Ẁ' => 'W', + 'Ŵ' => 'W', + 'Ẅ' => 'W', + 'Ý' => 'Y', + 'Ỳ' => 'Y', + 'Ŷ' => 'Y', + 'Ÿ' => 'Y', + 'Ź' => 'Z', + 'Ž' => 'Z', + 'Ż' => 'Z', + 'Þ' => 'Th', +]; |