summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/SemanticMediaWiki/src/Utils/CharExaminer.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/extensions/SemanticMediaWiki/src/Utils/CharExaminer.php')
-rw-r--r--www/wiki/extensions/SemanticMediaWiki/src/Utils/CharExaminer.php90
1 files changed, 90 insertions, 0 deletions
diff --git a/www/wiki/extensions/SemanticMediaWiki/src/Utils/CharExaminer.php b/www/wiki/extensions/SemanticMediaWiki/src/Utils/CharExaminer.php
new file mode 100644
index 00000000..e7d07c48
--- /dev/null
+++ b/www/wiki/extensions/SemanticMediaWiki/src/Utils/CharExaminer.php
@@ -0,0 +1,90 @@
+<?php
+
+namespace SMW\Utils;
+
+/**
+ * @license GNU GPL v2+
+ * @since 3.0
+ *
+ * @author mwjames
+ */
+class CharExaminer {
+
+ const CYRILLIC = 'CYRILLIC';
+ const LATIN = 'LATIN';
+ const HIRAGANA_KATAKANA = 'HIRAGANA_KATAKANA';
+ const HANGUL = 'HANGUL';
+ const CJK_UNIFIED = 'CJK_UNIFIED';
+ const HAN = 'HAN';
+
+ /**
+ * @since 3.0
+ *
+ * @param string $text
+ *
+ * @return boolean
+ */
+ public static function isCJK( $text ) {
+
+ if ( self::contains( self::HAN, $text ) ) {
+ return true;
+ }
+
+ if ( self::contains( self::HIRAGANA_KATAKANA, $text ) ) {
+ return true;
+ }
+
+ if ( self::contains( self::HANGUL, $text ) ) {
+ return true;
+ }
+
+ if ( self::contains( self::CJK_UNIFIED, $text ) ) {
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * @see http://jrgraphix.net/research/unicode_blocks.php
+ * @since 0.1
+ *
+ * @param string $type
+ * @param string $text
+ *
+ * @return boolean
+ */
+ public static function contains( $type, $text ) {
+
+ if ( $type === self::CYRILLIC ) {
+ return preg_match('/\p{Cyrillic}/u', $text ) > 0;
+ }
+
+ if ( $type === self::LATIN ) {
+ return preg_match('/\p{Latin}/u', $text ) > 0;
+ }
+
+ if ( $type === self::HAN ) {
+ return preg_match('/\p{Han}/u', $text ) > 0;
+ }
+
+ if ( $type === self::HIRAGANA_KATAKANA ) {
+ return preg_match('/[\x{3040}-\x{309F}]/u', $text ) > 0 || preg_match('/[\x{30A0}-\x{30FF}]/u', $text ) > 0; // isHiragana || isKatakana
+ }
+
+ if ( $type === self::HANGUL ) {
+ return preg_match('/[\x{3130}-\x{318F}]/u', $text ) > 0 || preg_match('/[\x{AC00}-\x{D7AF}]/u', $text ) > 0;
+ }
+
+ // @see https://en.wikipedia.org/wiki/CJK_Unified_Ideographs
+ // Chinese, Japanese and Korean (CJK) scripts share common characters
+ // known as CJK characters
+
+ if ( $type === self::CJK_UNIFIED ) {
+ return preg_match('/[\x{4e00}-\x{9fa5}]/u', $text ) > 0;
+ }
+
+ return false;
+ }
+
+}