diff options
Diffstat (limited to 'www/wiki/extensions/SemanticMediaWiki/data/elastic/smw-data-icu.json')
-rw-r--r-- | www/wiki/extensions/SemanticMediaWiki/data/elastic/smw-data-icu.json | 283 |
1 files changed, 283 insertions, 0 deletions
diff --git a/www/wiki/extensions/SemanticMediaWiki/data/elastic/smw-data-icu.json b/www/wiki/extensions/SemanticMediaWiki/data/elastic/smw-data-icu.json new file mode 100644 index 00000000..a2bb335d --- /dev/null +++ b/www/wiki/extensions/SemanticMediaWiki/data/elastic/smw-data-icu.json @@ -0,0 +1,283 @@ +{ + "settings": { + "number_of_shards": 2, + "number_of_replicas": 2, + "refresh_interval" : "1s", + "index.mapping.total_fields.limit": "9000", + "index.max_result_window": "50000", + "analysis": { + "filter": { + "uri_stopwords_filter": { + "type": "stop", + "stopwords": [ "http", "https", "ftp", "www" ] + } + }, + "char_filter": { + "nfd_normalizer": { + "type": "icu_normalizer", + "name": "nfc", + "mode": "decompose" + }, + "nfkc_normalizer": { + "type": "icu_normalizer", + "name": "nfkc", + "mode": "decompose" + }, + "wiki_char_filter": { + "type": "mapping", + "mappings": [ + "[ => ", + "] => ", + "/ => " + ] + } + }, + "analyzer": { + "uri_lowercase_with_stopwords": { + "type": "custom", + "tokenizer": "lowercase", + "filter": [ "uri_stopwords_filter" ] + }, + "nfkc_cf_normalized": { + "tokenizer": "icu_tokenizer", + "char_filter": [ + "icu_normalizer" + ] + }, + "nfkc_cf_normalized_lowercase": { + "tokenizer": "icu_tokenizer", + "char_filter": [ + "icu_normalizer" + ], + "filter": [ "lowercase" ] + }, + "nfd_normalized": { + "tokenizer": "icu_tokenizer", + "char_filter": [ + "nfd_normalizer" + ] + }, + "nfkc_normalized": { + "tokenizer": "icu_tokenizer", + "char_filter": [ + "nfkc_normalizer" + ] + } + } + } + }, + "mappings": { + "data": { + "dynamic_templates": [ + { + "text_fields": { + "path_match": "P:*.txtField", + "match_mapping_type": "*", + "mapping": { + "type": "text", + "copy_to": "text_copy", + "analyzer": "nfkc_cf_normalized", + "fields": { + "sort": { + "type": "icu_collation_keyword", + "index": false, + "ignore_above": 256 + }, + "keyword": { + "type": "keyword", + "ignore_above": 2000 + } + } + } + } + }, + { + "uri_fields": { + "path_match": "P:*.uriField", + "match_mapping_type": "*", + "mapping": { + "type": "text", + "copy_to": "text_copy", + "fields": { + "sort": { + "type": "icu_collation_keyword", + "index": false, + "ignore_above": 256 + }, + "keyword": { + "type": "keyword", + "ignore_above": 2000 + }, + "lowercase": { + "type": "text", + "analyzer": "uri_lowercase_with_stopwords" + } + } + } + } + }, + { + "page_fields_text": { + "path_match": "P:*.wpgField", + "match_mapping_type": "*", + "mapping": { + "type": "text", + "copy_to": "text_copy", + "analyzer": "nfkc_cf_normalized", + "fields": { + "sort": { + "type": "icu_collation_keyword", + "index": false, + "ignore_above": 256 + }, + "keyword": { + "type": "keyword", + "ignore_above": 500 + }, + "lowercase": { + "type": "keyword", + "analyzer": "nfkc_cf_normalized_lowercase" + } + } + } + } + }, + { + "page_fields_identifier": { + "path_match": "P:*.wpgID", + "match_mapping_type": "*", + "mapping": { + "type": "long" + } + } + }, + { + "numeric_fields": { + "path_match": "P:*.numField", + "match_mapping_type": "*", + "mapping": { + "type": "double", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + { + "date_fields": { + "path_match": "P:*.datField", + "match_mapping_type": "*", + "mapping": { + "type": "double", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + { + "date_fields_raw": { + "path_match": "P:*.dat_raw", + "match_mapping_type": "*", + "mapping": { + "type": "keyword" + } + } + }, + { + "geo_fields": { + "path_match": "P:*.geoField", + "match_mapping_type": "string", + "mapping": { + "type": "text", + "fields": { + "point": { + "type": "geo_point" + } + } + } + } + }, + { + "boolean_fields": { + "path_match": "P:*.booField", + "match_mapping_type": "boolean", + "mapping": { + "type": "boolean" + } + } + } + ], + "properties": { + "noop": { + "type": "integer" + }, + "text_copy": { + "type": "text", + "analyzer": "nfkc_cf_normalized", + "doc_values": false + }, + "text_raw": { + "type": "text", + "analyzer": "nfkc_cf_normalized", + "doc_values": false + }, + "subject.title": { + "type": "text", + "fields": { + "sort": { + "type": "icu_collation_keyword", + "index": false + }, + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "subject.interwiki": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "subject.subobject": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "subject.sortkey": { + "type": "text", + "copy_to": "text_copy", + "fields": { + "sort": { + "type": "icu_collation_keyword", + "index": false + }, + "keyword": { + "type": "keyword", + "ignore_above": 256 + }, + "lowercase": { + "type": "keyword", + "analyzer": "nfkc_cf_normalized_lowercase" + } + } + } + } + } + } +}
\ No newline at end of file |