summaryrefslogtreecommitdiff
path: root/www/wiki/tests/phpunit/includes/collation
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/tests/phpunit/includes/collation')
-rw-r--r--www/wiki/tests/phpunit/includes/collation/CollationFaTest.php55
-rw-r--r--www/wiki/tests/phpunit/includes/collation/CollationTest.php118
-rw-r--r--www/wiki/tests/phpunit/includes/collation/CustomUppercaseCollationTest.php68
3 files changed, 241 insertions, 0 deletions
diff --git a/www/wiki/tests/phpunit/includes/collation/CollationFaTest.php b/www/wiki/tests/phpunit/includes/collation/CollationFaTest.php
new file mode 100644
index 00000000..f7455419
--- /dev/null
+++ b/www/wiki/tests/phpunit/includes/collation/CollationFaTest.php
@@ -0,0 +1,55 @@
+<?php
+
+/**
+ * @covers CollationFa
+ */
+class CollationFaTest extends MediaWikiTestCase {
+
+ /*
+ * The ordering is a weird hack designed to work only with a very
+ * specific version of libicu, and as such can't really be unit tested
+ * against a random version of libicu
+ */
+
+ public function setUp() {
+ parent::setUp();
+ $this->checkPHPExtension( 'intl' );
+ }
+
+ /**
+ * @dataProvider provideGetFirstLetter
+ */
+ public function testGetFirstLetter( $letter, $str ) {
+ $coll = new CollationFa;
+ $this->assertEquals( $letter, $coll->getFirstLetter( $str ), $str );
+ }
+
+ public function provideGetFirstLetter() {
+ return [
+ [
+ '۷',
+ '۷'
+ ],
+ [
+ 'ا',
+ 'ا'
+ ],
+ [
+ 'ا',
+ 'ایران'
+ ],
+ [
+ 'ب',
+ 'برلین'
+ ],
+ [
+ 'و',
+ 'واو'
+ ],
+ [ "\xd8\xa7", "\xd8\xa7Foo" ],
+ [ "\xd9\x88", "\xd9\x88Foo" ],
+ [ "\xd9\xb2", "\xd9\xb2Foo" ],
+ [ "\xd9\xb3", "\xd9\xb3Foo" ],
+ ];
+ }
+}
diff --git a/www/wiki/tests/phpunit/includes/collation/CollationTest.php b/www/wiki/tests/phpunit/includes/collation/CollationTest.php
new file mode 100644
index 00000000..b92e651e
--- /dev/null
+++ b/www/wiki/tests/phpunit/includes/collation/CollationTest.php
@@ -0,0 +1,118 @@
+<?php
+
+/**
+ * Class CollationTest
+ * @covers Collation
+ * @covers IcuCollation
+ * @covers IdentityCollation
+ * @covers UppercaseCollation
+ */
+class CollationTest extends MediaWikiLangTestCase {
+ protected function setUp() {
+ parent::setUp();
+ $this->checkPHPExtension( 'intl' );
+ }
+
+ /**
+ * Test to make sure, that if you
+ * have "X" and "XY", the binary
+ * sortkey also has "X" being a
+ * prefix of "XY". Our collation
+ * code makes this assumption.
+ *
+ * @param string $lang Language code for collator
+ * @param string $base
+ * @param string $extended String containing base as a prefix.
+ *
+ * @dataProvider prefixDataProvider
+ */
+ public function testIsPrefix( $lang, $base, $extended ) {
+ $cp = Collator::create( $lang );
+ $cp->setStrength( Collator::PRIMARY );
+ $baseBin = $cp->getSortKey( $base );
+ // Remove sortkey terminator
+ $baseBin = rtrim( $baseBin, "\0" );
+ $extendedBin = $cp->getSortKey( $extended );
+ $this->assertStringStartsWith( $baseBin, $extendedBin, "$base is not a prefix of $extended" );
+ }
+
+ public static function prefixDataProvider() {
+ return [
+ [ 'en', 'A', 'AA' ],
+ [ 'en', 'A', 'AAA' ],
+ [ 'en', 'Д', 'ДЂ' ],
+ [ 'en', 'Д', 'ДA' ],
+ // 'Ʒ' should expand to 'Z ' (note space).
+ [ 'fi', 'Z', 'Ʒ' ],
+ // 'Þ' should expand to 'th'
+ [ 'sv', 't', 'Þ' ],
+ // Javanese is a limited use alphabet, so should have 3 bytes
+ // per character, so do some tests with it.
+ [ 'en', 'ꦲ', 'ꦲꦤ' ],
+ [ 'en', 'ꦲ', 'ꦲД' ],
+ [ 'en', 'A', 'Aꦲ' ],
+ ];
+ }
+
+ /**
+ * Opposite of testIsPrefix
+ *
+ * @dataProvider notPrefixDataProvider
+ */
+ public function testNotIsPrefix( $lang, $base, $extended ) {
+ $cp = Collator::create( $lang );
+ $cp->setStrength( Collator::PRIMARY );
+ $baseBin = $cp->getSortKey( $base );
+ // Remove sortkey terminator
+ $baseBin = rtrim( $baseBin, "\0" );
+ $extendedBin = $cp->getSortKey( $extended );
+ $this->assertStringStartsNotWith( $baseBin, $extendedBin, "$base is a prefix of $extended" );
+ }
+
+ public static function notPrefixDataProvider() {
+ return [
+ [ 'en', 'A', 'B' ],
+ [ 'en', 'AC', 'ABC' ],
+ [ 'en', 'Z', 'Ʒ' ],
+ [ 'en', 'A', 'ꦲ' ],
+ ];
+ }
+
+ /**
+ * Test correct first letter is fetched.
+ *
+ * @param string $collation Collation name (aka uca-en)
+ * @param string $string String to get first letter of
+ * @param string $firstLetter Expected first letter.
+ *
+ * @dataProvider firstLetterProvider
+ */
+ public function testGetFirstLetter( $collation, $string, $firstLetter ) {
+ $col = Collation::factory( $collation );
+ $this->assertEquals( $firstLetter, $col->getFirstLetter( $string ) );
+ }
+
+ function firstLetterProvider() {
+ return [
+ [ 'uppercase', 'Abc', 'A' ],
+ [ 'uppercase', 'abc', 'A' ],
+ [ 'identity', 'abc', 'a' ],
+ [ 'uca-en', 'abc', 'A' ],
+ [ 'uca-en', ' ', ' ' ],
+ [ 'uca-en', 'Êveryone', 'E' ],
+ [ 'uca-vi', 'Êveryone', 'Ê' ],
+ // Make sure thorn is not a first letter.
+ [ 'uca-sv', 'The', 'T' ],
+ [ 'uca-sv', 'Å', 'Å' ],
+ [ 'uca-hu', 'dzsdo', 'Dzs' ],
+ [ 'uca-hu', 'dzdso', 'Dz' ],
+ [ 'uca-hu', 'CSD', 'Cs' ],
+ [ 'uca-root', 'CSD', 'C' ],
+ [ 'uca-fi', 'Ǥ', 'G' ],
+ [ 'uca-fi', 'Ŧ', 'T' ],
+ [ 'uca-fi', 'Ʒ', 'Z' ],
+ [ 'uca-fi', 'Ŋ', 'N' ],
+ [ 'uppercase-ba', 'в', 'В' ],
+ ];
+ }
+}
diff --git a/www/wiki/tests/phpunit/includes/collation/CustomUppercaseCollationTest.php b/www/wiki/tests/phpunit/includes/collation/CustomUppercaseCollationTest.php
new file mode 100644
index 00000000..f9e0bc9b
--- /dev/null
+++ b/www/wiki/tests/phpunit/includes/collation/CustomUppercaseCollationTest.php
@@ -0,0 +1,68 @@
+<?php
+
+/**
+ * @covers CustomUppercaseCollation
+ */
+class CustomUppercaseCollationTest extends MediaWikiTestCase {
+
+ public function setUp() {
+ $this->collation = new CustomUppercaseCollation( [
+ 'D',
+ 'C',
+ 'Cs',
+ 'B'
+ ], Language::factory( 'en' ) );
+
+ parent::setUp();
+ }
+
+ /**
+ * @dataProvider providerOrder
+ */
+ public function testOrder( $first, $second, $msg ) {
+ $sortkey1 = $this->collation->getSortKey( $first );
+ $sortkey2 = $this->collation->getSortKey( $second );
+
+ $this->assertTrue( strcmp( $sortkey1, $sortkey2 ) < 0, $msg );
+ }
+
+ public function providerOrder() {
+ return [
+ [ 'X', 'Z', 'Maintain order of unrearranged' ],
+ [ 'D', 'C', 'Actually resorts' ],
+ [ 'D', 'B', 'resort test 2' ],
+ [ 'Adobe', 'Abode', 'not first letter' ],
+ [ '💩 ', 'C', 'Test relocated to end' ],
+ [ 'c', 'b', 'lowercase' ],
+ [ 'x', 'z', 'lowercase original' ],
+ [ 'Cz', 'Cs', 'digraphs' ],
+ [ 'C50D', 'C100', 'Numbers' ]
+ ];
+ }
+
+ /**
+ * @dataProvider provideGetFirstLetter
+ */
+ public function testGetFirstLetter( $string, $first ) {
+ $this->assertSame( $this->collation->getFirstLetter( $string ), $first );
+ }
+
+ public function provideGetFirstLetter() {
+ return [
+ [ 'Do', 'D' ],
+ [ 'do', 'D' ],
+ [ 'Ao', 'A' ],
+ [ 'afdsa', 'A' ],
+ [ "\xF3\xB3\x80\x80Foo", 'D' ],
+ [ "\xF3\xB3\x80\x81Foo", 'C' ],
+ [ "\xF3\xB3\x80\x82Foo", 'Cs' ],
+ [ "\xF3\xB3\x80\x83Foo", 'B' ],
+ [ "\xF3\xB3\x80\x84Foo", "\xF3\xB3\x80\x84" ],
+ [ 'C', 'C' ],
+ [ 'Cz', 'C' ],
+ [ 'Cs', 'Cs' ],
+ [ 'CS', 'Cs' ],
+ [ 'cs', 'Cs' ],
+ ];
+ }
+}