diff options
Diffstat (limited to 'www/wiki/extensions/SemanticMediaWiki/includes/export/SMW_Serializer_Turtle.php')
-rw-r--r-- | www/wiki/extensions/SemanticMediaWiki/includes/export/SMW_Serializer_Turtle.php | 294 |
1 files changed, 294 insertions, 0 deletions
diff --git a/www/wiki/extensions/SemanticMediaWiki/includes/export/SMW_Serializer_Turtle.php b/www/wiki/extensions/SemanticMediaWiki/includes/export/SMW_Serializer_Turtle.php new file mode 100644 index 00000000..d7a6ff2a --- /dev/null +++ b/www/wiki/extensions/SemanticMediaWiki/includes/export/SMW_Serializer_Turtle.php @@ -0,0 +1,294 @@ +<?php + +use SMW\InMemoryPoolCache; + +/** + * File holding the SMWTurtleSerializer class that provides basic functions for + * serialising OWL data in Turtle syntax. + * + * @ingroup SMW + * + * @author Markus Krötzsch + */ + +/** + * Class for serializing exported data (encoded as SMWExpData object) in + * Turtle syntax. + * + * @ingroup SMW + */ +class SMWTurtleSerializer extends SMWSerializer { + /** + * Array of non-trivial sub-SMWExpData elements that cannot be nested while + * serializing some SMWExpData. The elements of the array are serialized + * later during the same serialization step (so this is not like another + * queue for declarations or the like; it just unfolds an SMWExpData + * object). + * + * @var array of SMWExpData + */ + protected $subexpdata; + + /** + * If true, do not serialize namespace declarations and record them in + * $sparql_namespaces instead for later retrieval. + * @var boolean + */ + protected $sparqlmode; + + /** + * Array of retrieved namespaces (abbreviation => URI) for later use. + * @var array of string + */ + protected $sparql_namespaces; + + public function __construct( $sparqlMode = false ) { + parent::__construct(); + $this->sparqlmode = $sparqlMode; + } + + public function clear() { + parent::clear(); + $this->sparql_namespaces = []; + } + + /** + * @since 2.3 + */ + public static function reset() { + InMemoryPoolCache::getInstance()->resetPoolCacheById( 'turtle.serializer' ); + } + + /** + * Get an array of namespace prefixes used in SPARQL mode. + * Namespaces are not serialized among triples in SPARQL mode but are + * collected separately. This method returns the prefixes and empties + * the collected list afterwards. + * + * @return array shortName => namespace URI + */ + public function flushSparqlPrefixes() { + $result = $this->sparql_namespaces; + $this->sparql_namespaces = []; + return $result; + } + + protected function serializeHeader() { + if ( $this->sparqlmode ) { + $this->pre_ns_buffer = ''; + $this->sparql_namespaces = [ + "rdf" => SMWExporter::getInstance()->expandURI( '&rdf;' ), + "rdfs" => SMWExporter::getInstance()->expandURI( '&rdfs;' ), + "owl" => SMWExporter::getInstance()->expandURI( '&owl;' ), + "swivt" => SMWExporter::getInstance()->expandURI( '&swivt;' ), + "wiki" => SMWExporter::getInstance()->expandURI( '&wiki;' ), + "category" => SMWExporter::getInstance()->expandURI( '&category;' ), + "property" => SMWExporter::getInstance()->expandURI( '&property;' ), + "xsd" => "http://www.w3.org/2001/XMLSchema#" , + "wikiurl" => SMWExporter::getInstance()->expandURI( '&wikiurl;' ) + ]; + } else { + $this->pre_ns_buffer = + "@prefix rdf: <" . SMWExporter::getInstance()->expandURI( '&rdf;' ) . "> .\n" . + "@prefix rdfs: <" . SMWExporter::getInstance()->expandURI( '&rdfs;' ) . "> .\n" . + "@prefix owl: <" . SMWExporter::getInstance()->expandURI( '&owl;' ) . "> .\n" . + "@prefix swivt: <" . SMWExporter::getInstance()->expandURI( '&swivt;' ) . "> .\n" . + // A note on "wiki": this namespace is crucial as a fallback when it would be illegal to start e.g. with a number. + // In this case, one can always use wiki:... followed by "_" and possibly some namespace, since _ is legal as a first character. + "@prefix wiki: <" . SMWExporter::getInstance()->expandURI( '&wiki;' ) . "> .\n" . + "@prefix category: <" . SMWExporter::getInstance()->expandURI( '&category;' ) . "> .\n" . + "@prefix property: <" . SMWExporter::getInstance()->expandURI( '&property;' ) . "> .\n" . + "@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n" . // note that this XSD URI is hardcoded below (its unlikely to change, of course) + "@prefix wikiurl: <" . SMWExporter::getInstance()->expandURI( '&wikiurl;' ) . "> .\n"; + } + $this->global_namespaces = [ 'rdf' => true, 'rdfs' => true, 'owl' => true, 'swivt' => true, 'wiki' => true, 'property' => true, 'category' => true ]; + $this->post_ns_buffer = "\n"; + } + + protected function serializeFooter() { + if ( !$this->sparqlmode ) { + $this->post_ns_buffer .= "\n# Created by Semantic MediaWiki, https://www.semantic-mediawiki.org/\n"; + } + } + + public function serializeDeclaration( $uri, $typename ) { + $this->post_ns_buffer .= "<" . SMWExporter::getInstance()->expandURI( $uri ) . "> rdf:type $typename .\n"; + } + + public function serializeExpData( SMWExpData $expData ) { + + $this->subExpData = [ $expData ]; + + while ( count( $this->subExpData ) > 0 ) { + $this->serializeNestedExpData( array_pop( $this->subExpData ), '' ); + } + + $this->serializeNamespaces(); + } + + protected function serializeNamespace( $shortname, $uri ) { + $this->global_namespaces[$shortname] = true; + if ( $this->sparqlmode ) { + $this->sparql_namespaces[$shortname] = $uri; + } else { + $this->pre_ns_buffer .= "@prefix $shortname: <$uri> .\n"; + } + } + + /** + * Serialize the given SMWExpData object, possibly recursively with + * increased indentation. + * + * @param $data SMWExpData containing the data to be serialised. + * @param $indent string specifying a prefix for indentation (usually a sequence of tabs) + */ + protected function serializeNestedExpData( SMWExpData $data, $indent ) { + if ( count( $data->getProperties() ) == 0 ) { + return; // nothing to export + } + + // Avoid posting turtle property declarations already known for the + // subject more than once + if ( $data->getSubject()->getDataItem() !== null && $data->getSubject()->getDataItem()->getNamespace() === SMW_NS_PROPERTY ) { + + $hash = $data->getHash(); + $poolCache = InMemoryPoolCache::getInstance()->getPoolCacheById( 'turtle.serializer' ); + + if ( $poolCache->contains( $hash ) && $poolCache->fetch( $hash ) ) { + return; + } + + $poolCache->save( $hash, true ); + } + + $this->recordDeclarationTypes( $data ); + + $bnode = false; + $this->post_ns_buffer .= $indent; + if ( !$data->getSubject()->isBlankNode() ) { + $this->serializeExpResource( $data->getSubject() ); + } else { // blank node + $bnode = true; + $this->post_ns_buffer .= "["; + } + + if ( ( $indent !== '' ) && ( !$bnode ) ) { // called to generate a nested descripion; but Turtle cannot nest non-bnode descriptions, do this later + $this->subexpdata[] = $data; + return; + } elseif ( !$bnode ) { + $this->post_ns_buffer .= "\n "; + } + + $firstproperty = true; + foreach ( $data->getProperties() as $property ) { + $this->post_ns_buffer .= $firstproperty ? "\t" : " ;\n $indent\t"; + $firstproperty = false; + $prop_decl_queued = false; + $class_type_prop = $this->isOWLClassTypeProperty( $property ); + $this->serializeExpResource( $property ); + $firstvalue = true; + + foreach ( $data->getValues( $property ) as $value ) { + $this->post_ns_buffer .= $firstvalue ? ' ' : ' , '; + $firstvalue = false; + + if ( $value instanceof SMWExpLiteral ) { + $prop_decl_type = SMW_SERIALIZER_DECL_APROP; + $this->serializeExpLiteral( $value ); + } elseif ( $value instanceof SMWExpResource ) { + $prop_decl_type = SMW_SERIALIZER_DECL_OPROP; + $this->serializeExpResource( $value ); + } elseif ( $value instanceof SMWExpData ) { // resource (maybe blank node), could have subdescriptions + $prop_decl_type = SMW_SERIALIZER_DECL_OPROP; + $collection = $value->getCollection(); + if ( $collection !== false ) { // RDF-style collection (list) + $this->post_ns_buffer .= "( "; + foreach ( $collection as $subvalue ) { + $this->serializeNestedExpData( $subvalue, $indent . "\t\t" ); + if ( $class_type_prop ) { + $this->requireDeclaration( $subvalue->getSubject(), SMW_SERIALIZER_DECL_CLASS ); + } + } + $this->post_ns_buffer .= " )"; + } else { + if ( $class_type_prop ) { + $this->requireDeclaration( $value->getSubject(), SMW_SERIALIZER_DECL_CLASS ); + } + if ( count( $value->getProperties() ) > 0 ) { // resource with data: serialise + $this->post_ns_buffer .= "\n"; + $this->serializeNestedExpData( $value, $indent . "\t\t" ); + } else { // resource without data: may need to be queued + $this->serializeExpResource( $value->getSubject() ); + } + } + } + + if ( !$prop_decl_queued ) { + $this->requireDeclaration( $property, $prop_decl_type ); + $prop_decl_queued = true; + } + } + } + $this->post_ns_buffer .= ( $bnode ? " ]" : " ." ) . ( $indent === '' ? "\n\n" : '' ); + } + + protected function serializeExpLiteral( SMWExpLiteral $element ) { + $this->post_ns_buffer .= self::getTurtleNameForExpElement( $element ); + } + + protected function serializeExpResource( SMWExpResource $element ) { + if ( $element instanceof SMWExpNsResource ) { + $this->requireNamespace( $element->getNamespaceID(), $element->getNamespace() ); + } + $this->post_ns_buffer .= self::getTurtleNameForExpElement( $element ); + } + + /** + * Get the Turtle serialization string for the given SMWExpElement. The + * method just computes a name, and does not serialize triples, so the + * parameter must be an SMWExpResource or SMWExpLiteral, no SMWExpData. + * + * @param $expElement SMWExpElement being SMWExpLiteral or SMWExpResource + * @return string + */ + public static function getTurtleNameForExpElement( SMWExpElement $expElement ) { + if ( $expElement instanceof SMWExpResource ) { + if ( $expElement->isBlankNode() ) { + return '[]'; + } elseif ( ( $expElement instanceof SMWExpNsResource ) && ( $expElement->hasAllowedLocalName() ) ) { + return $expElement->getQName(); + } else { + return '<' . str_replace( '>', '\>', SMWExporter::getInstance()->expandURI( $expElement->getUri() ) ) . '>'; + } + } elseif ( $expElement instanceof SMWExpLiteral ) { + $dataType = $expElement->getDatatype(); + $lexicalForm = self::getCorrectLexicalForm( $expElement ); + + if ( ( $dataType !== '' ) && ( $dataType != 'http://www.w3.org/2001/XMLSchema#string' ) ) { + $count = 0; + $newdt = str_replace( 'http://www.w3.org/2001/XMLSchema#', 'xsd:', $dataType, $count ); + return ( $count == 1 ) ? "$lexicalForm^^$newdt" : "$lexicalForm^^<$dataType>"; + } else { + return $lexicalForm; + } + } else { + throw new InvalidArgumentException( 'The method can only serialize atomic elements of type SMWExpResource or SMWExpLiteral.' ); + } + } + + private static function getCorrectLexicalForm( $expElement ) { + + $lexicalForm = str_replace( [ '\\', "\n", '"' ], [ '\\\\', "\\n", '\"' ], $expElement->getLexicalForm() ); + + if ( $expElement->getLang() !== '' && ( $expElement->getDatatype() === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#langString' ) ) { + $lexicalForm = '"' . $lexicalForm . '@' . $expElement->getLang() . '"'; + } elseif ( $expElement->getLang() !== '' ) { + $lexicalForm = '"' . $lexicalForm . '"'. '@' . $expElement->getLang(); + } else { + $lexicalForm = '"' . $lexicalForm . '"'; + } + + return $lexicalForm; + } + +} |