summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/SemanticMediaWiki/src/MediaWiki/Collator.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/extensions/SemanticMediaWiki/src/MediaWiki/Collator.php')
-rw-r--r--www/wiki/extensions/SemanticMediaWiki/src/MediaWiki/Collator.php138
1 files changed, 138 insertions, 0 deletions
diff --git a/www/wiki/extensions/SemanticMediaWiki/src/MediaWiki/Collator.php b/www/wiki/extensions/SemanticMediaWiki/src/MediaWiki/Collator.php
new file mode 100644
index 00000000..5bf10d51
--- /dev/null
+++ b/www/wiki/extensions/SemanticMediaWiki/src/MediaWiki/Collator.php
@@ -0,0 +1,138 @@
+<?php
+
+namespace SMW\MediaWiki;
+
+use Collation;
+
+/**
+ * @license GNU GPL v2+
+ * @since 3.0
+ *
+ * @author mwjames
+ */
+class Collator {
+
+ /**
+ * @var Collator
+ */
+ private static $instance = [];
+
+ /**
+ * @var Collation
+ */
+ private $collation;
+
+ /**
+ * @var string
+ */
+ private $collationName;
+
+ /**
+ * @private
+ *
+ * @since 3.0
+ *
+ * @param Collation $collation
+ * @param string $collationName
+ */
+ public function __construct( Collation $collation, $collationName = '' ) {
+ $this->collation = $collation;
+ $this->collationName = $collationName;
+ }
+
+ /**
+ * @since 3.0
+ *
+ * @param srtring $collationName
+ *
+ * @return Collator
+ */
+ public static function singleton( $collationName = '' ) {
+
+ $collationName = $collationName === '' ? $GLOBALS['smwgEntityCollation'] : $collationName;
+
+ if ( !isset( self::$instance[$collationName] ) ) {
+ self::$instance[$collationName] = new self( Collation::factory( $collationName ), $collationName );
+ }
+
+ return self::$instance[$collationName];
+ }
+
+ /**
+ * For any uca-* generated sortkey armor any invalid or unrecognized UTF-8
+ * characters to prevent an invalid XML/UTF output.
+ *
+ * Characters that cannot be expressed are replaced by ? which is surely
+ * inaccurate in comparison to the original uca-* sortkey but it allows to
+ * replicate a near surrogate string to a back-end that requires XML
+ * compliance (triple store).
+ *
+ * @since 3.0
+ *
+ * @param string $text
+ *
+ * @return string
+ */
+ public function armor( $text, $source = '' ) {
+
+ if ( strpos( $this->collationName, 'uca' ) === false ) {
+ return $text;
+ }
+
+ // $text = mb_convert_encoding( $text, 'UTF-8' );
+
+ // https://magp.ie/2011/01/06/remove-non-utf8-characters-from-string-with-php/
+ // Remove all none utf-8 symbols
+ $text = str_replace( '�', '', htmlspecialchars( $text, ENT_SUBSTITUTE, 'UTF-8' ) );
+
+ // remove non-breaking spaces and other non-standard spaces
+ $text = preg_replace( '~\s+~u', '?', $text );
+
+ // replace controls symbols with "?"
+ $text = preg_replace( '~\p{C}+~u', '?', $text );
+
+ return $text;
+ }
+
+ /**
+ * @since 3.0
+ *
+ * @param string $text
+ *
+ * @return string
+ */
+ public function getSortKey( $text ) {
+ return $this->collation->getSortKey( $text );
+ }
+
+ /**
+ * @since 3.0
+ *
+ * @param string $text
+ *
+ * @return string
+ */
+ public function getFirstLetter( $text ) {
+
+ // Add check otherwise the Collation instance returns with a
+ // "Uninitialized string offset: 0"
+ if ( $text === '' ) {
+ return '';
+ }
+
+ return $this->collation->getFirstLetter( $text );
+ }
+
+ /**
+ * @since 3.0
+ *
+ * @param string $old
+ * @param string $new
+ *
+ * @return boolean
+ */
+ public function isIdentical( $old, $new ) {
+ return $this->collation->getSortKey( $old ) === $this->collation->getSortKey( $new );
+ }
+
+}