diff options
Diffstat (limited to 'www/wiki/extensions/SemanticMediaWiki/src/SPARQLStore/RepositoryConnectors/GenericRepositoryConnector.php')
-rw-r--r-- | www/wiki/extensions/SemanticMediaWiki/src/SPARQLStore/RepositoryConnectors/GenericRepositoryConnector.php | 594 |
1 files changed, 594 insertions, 0 deletions
diff --git a/www/wiki/extensions/SemanticMediaWiki/src/SPARQLStore/RepositoryConnectors/GenericRepositoryConnector.php b/www/wiki/extensions/SemanticMediaWiki/src/SPARQLStore/RepositoryConnectors/GenericRepositoryConnector.php new file mode 100644 index 00000000..76361a4c --- /dev/null +++ b/www/wiki/extensions/SemanticMediaWiki/src/SPARQLStore/RepositoryConnectors/GenericRepositoryConnector.php @@ -0,0 +1,594 @@ +<?php + +namespace SMW\SPARQLStore\RepositoryConnectors; + +use Onoi\HttpRequest\HttpRequest; +use SMW\SPARQLStore\Exception\BadHttpEndpointResponseException; +use SMW\SPARQLStore\HttpResponseErrorMapper; +use SMW\SPARQLStore\QueryEngine\RepositoryResult; +use SMW\SPARQLStore\QueryEngine\XmlResponseParser; +use SMW\SPARQLStore\RepositoryClient; +use SMW\SPARQLStore\RepositoryConnection; +use SMWExporter as Exporter; + +/** + * Basic database connector for exchanging data via SPARQL. + * + * @license GNU GPL v2+ + * @since 1.6 + * + * @author Markus Krötzsch + */ +class GenericRepositoryConnector implements RepositoryConnection { + + /** + * Flag denoting endpoints being capable of querying + */ + const ENDP_QUERY = 1; + + /** + * Flag denoting endpoints being capable of updating + */ + const ENDP_UPDATE = 2; + + /** + * Flag denoting endpoints being capable of SPARQL HTTP graph management + */ + const ENDP_DATA = 4; + + /** + * @var RepositoryClient + */ + protected $repositoryClient; + + /** + * @note Handles the curl handle and is reused throughout the instance to + * safe some initialization effort + * + * @var HttpRequest + */ + protected $httpRequest; + + /** + * @var HttpResponseErrorMapper + */ + private $badHttpResponseMapper; + + /** + * @note It is suggested to use the RepositoryConnectionProvider to create + * a valid instance + * + * @since 2.2 + * + * @param RepositoryClient $repositoryClient + * @param HttpRequest $httpRequest + */ + public function __construct( RepositoryClient $repositoryClient, HttpRequest $httpRequest ) { + $this->repositoryClient = $repositoryClient; + $this->httpRequest = $httpRequest; + + $this->httpRequest->setOption( CURLOPT_FORBID_REUSE, false ); + $this->httpRequest->setOption( CURLOPT_FRESH_CONNECT, false ); + $this->httpRequest->setOption( CURLOPT_RETURNTRANSFER, true ); // put result into variable + $this->httpRequest->setOption( CURLOPT_FAILONERROR, true ); + + $this->setConnectionTimeout( 10 ); + } + + /** + * @since 2.5 + * + * @return RepositoryClient + */ + public function getRepositoryClient() { + return $this->repositoryClient; + } + + /** + * Get the URI of the default graph that this database connector is + * using, or the empty string if none is used (no graph related + * statements in queries/updates). + * + * @return string graph UIR or empty + */ + public function getDefaultGraph() { + return $this->repositoryClient->getDefaultGraph(); + } + + /** + * @since 2.0 + * + * @param integer $timeout in seconds + */ + public function setConnectionTimeout( $timeout = 10 ) { + $this->httpRequest->setOption( CURLOPT_CONNECTTIMEOUT, $timeout ); + } + + /** + * Check if the database can be contacted. + * + * @todo SPARQL endpoints sometimes return errors if no (valid) query + * is posted. The current implementation tries to catch this, but this + * might not be entirely correct. Especially, the SPARQL 1.1 HTTP error + * codes for Update are not defined yet (April 15 2011). + * + * @param $pingQueryEndpoint boolean true if the query endpoint should be + * pinged, false if the update endpoint should be pinged + * + * @return boolean to indicate success + */ + public function ping( $endpointType = self::ENDP_QUERY ) { + if ( $endpointType == self::ENDP_QUERY ) { + $this->httpRequest->setOption( CURLOPT_URL, $this->repositoryClient->getQueryEndpoint() ); + $this->httpRequest->setOption( CURLOPT_NOBODY, true ); + $this->httpRequest->setOption( CURLOPT_POST, true ); + } elseif ( $endpointType == self::ENDP_UPDATE ) { + + if ( $this->repositoryClient->getUpdateEndpoint() === '' ) { + return false; + } + + $this->httpRequest->setOption( CURLOPT_URL, $this->repositoryClient->getUpdateEndpoint() ); + + // 4Store gives 404 instead of 500 with CURLOPT_NOBODY + $this->httpRequest->setOption( CURLOPT_NOBODY, false ); + + } else { // ( $endpointType == self::ENDP_DATA ) + + if ( $this->repositoryClient->getDataEndpoint() === '' ) { + return false; + } + + // try an empty POST + return $this->doHttpPost( '' ); + } + + $this->httpRequest->execute(); + + if ( $this->httpRequest->getLastErrorCode() == 0 ) { + return true; + } + + // Valid HTTP responses from a complaining SPARQL endpoint that is + // alive and kicking + $httpCode = $this->httpRequest->getLastTransferInfo( CURLINFO_HTTP_CODE ); + + return ( ( $httpCode == 500 ) || ( $httpCode == 400 ) ); + } + + /** + * SELECT wrapper. + * The function declares the standard namespaces wiki, swivt, rdf, owl, + * rdfs, property, xsd, so these do not have to be included in + * $extraNamespaces. + * + * @param $vars mixed array or string, field name(s) to be retrieved, can be '*' + * @param $where string WHERE part of the query, without surrounding { } + * @param $options array (associative) of options, e.g. array( 'LIMIT' => '10' ) + * @param $extraNamespaces array (associative) of namespaceId => namespaceUri + * + * @return RepositoryResult + */ + public function select( $vars, $where, $options = [], $extraNamespaces = [] ) { + return $this->doQuery( $this->getSparqlForSelect( $vars, $where, $options, $extraNamespaces ) ); + } + + /** + * Build the SPARQL query that is used by GenericHttpDatabaseConnector::select(). + * The function declares the standard namespaces wiki, swivt, rdf, owl, + * rdfs, property, xsd, so these do not have to be included in + * $extraNamespaces. + * + * @param $where string WHERE part of the query, without surrounding { } + * @param $extraNamespaces array (associative) of namespaceId => namespaceUri + * + * @return string SPARQL query + */ + public function getSparqlForSelect( $vars, $where, $options = [], $extraNamespaces = [] ) { + + $sparql = self::getPrefixString( $extraNamespaces ) . 'SELECT '; + + if ( array_key_exists( 'DISTINCT', $options ) ) { + $sparql .= 'DISTINCT '; + } + + if ( is_array( $vars ) ) { + $sparql .= implode( ',', $vars ); + } else { + $sparql .= $vars; + } + + $sparql .= " WHERE {\n" . $where . "\n}"; + + if ( array_key_exists( 'ORDER BY', $options ) ) { + $sparql .= "\nORDER BY " . $options['ORDER BY']; + } + + if ( array_key_exists( 'OFFSET', $options ) ) { + $sparql .= "\nOFFSET " . $options['OFFSET']; + } + + if ( array_key_exists( 'LIMIT', $options ) ) { + $sparql .= "\nLIMIT " . $options['LIMIT']; + } + + return $sparql; + } + + /** + * ASK wrapper. + * The function declares the standard namespaces wiki, swivt, rdf, owl, + * rdfs, property, xsd, so these do not have to be included in + * $extraNamespaces. + * + * @param $where string WHERE part of the query, without surrounding { } + * @param $extraNamespaces array (associative) of namespaceId => namespaceUri + * + * @return RepositoryResult + */ + public function ask( $where, $extraNamespaces = [] ) { + return $this->doQuery( $this->getSparqlForAsk( $where, $extraNamespaces ) ); + } + + /** + * Build the SPARQL query that is used by GenericHttpDatabaseConnector::ask(). + * The function declares the standard namespaces wiki, swivt, rdf, owl, + * rdfs, property, xsd, so these do not have to be included in + * $extraNamespaces. + * + * @param $where string WHERE part of the query, without surrounding { } + * @param $extraNamespaces array (associative) of namespaceId => namespaceUri + * + * @return string SPARQL query + */ + public function getSparqlForAsk( $where, $extraNamespaces = [] ) { + return self::getPrefixString( $extraNamespaces ) . "ASK {\n" . $where . "\n}"; + } + + /** + * SELECT wrapper for counting results. + * The function declares the standard namespaces wiki, swivt, rdf, owl, + * rdfs, property, xsd, so these do not have to be included in + * $extraNamespaces. + * + * @param $variable string variable name or '*' + * @param $where string WHERE part of the query, without surrounding { } + * @param $options array (associative) of options, e.g. array('LIMIT' => '10') + * @param $extraNamespaces array (associative) of namespaceId => namespaceUri + * + * @return RepositoryResult + */ + public function selectCount( $variable, $where, $options = [], $extraNamespaces = [] ) { + + $sparql = self::getPrefixString( $extraNamespaces ) . 'SELECT (COUNT('; + + if ( array_key_exists( 'DISTINCT', $options ) ) { + $sparql .= 'DISTINCT '; + } + + $sparql .= $variable . ") AS ?count) WHERE {\n" . $where . "\n}"; + + if ( array_key_exists( 'OFFSET', $options ) ) { + $sparql .= "\nOFFSET " . $options['OFFSET']; + } + + if ( array_key_exists( 'LIMIT', $options ) ) { + $sparql .= "\nLIMIT " . $options['LIMIT']; + } + + return $this->doQuery( $sparql ); + } + + /** + * DELETE wrapper. + * The function declares the standard namespaces wiki, swivt, rdf, owl, + * rdfs, property, xsd, so these do not have to be included in + * $extraNamespaces. + * + * @param $deletePattern string CONSTRUCT pattern of tripples to delete + * @param $where string condition for data to delete + * @param $extraNamespaces array (associative) of namespaceId => namespaceUri + * + * @return boolean stating whether the operations succeeded + */ + public function delete( $deletePattern, $where, $extraNamespaces = [] ) { + + $defaultGraph = $this->repositoryClient->getDefaultGraph(); + + $sparql = self::getPrefixString( $extraNamespaces ) . + ( ( $defaultGraph !== '' )? "WITH <{$defaultGraph}> " : '' ) . + "DELETE { $deletePattern } WHERE { $where }"; + + return $this->doUpdate( $sparql ); + } + + /** + * Convenience method for deleting all triples that have a subject that + * occurs in a triple with the given property and object. This is used + * in SMW to delete subobjects with all their data. Some RDF stores fail + * on complex delete queries, hence a wrapper function is provided to + * allow more pedestrian implementations. + * + * The function declares the standard namespaces wiki, swivt, rdf, owl, + * rdfs, property, xsd, so these do not have to be included in + * $extraNamespaces. + * + * @param $propertyName string Turtle name of marking property + * @param $objectName string Turtle name of marking object/value + * @param $extraNamespaces array (associative) of namespaceId => namespaceUri + * + * @return boolean stating whether the operations succeeded + */ + public function deleteContentByValue( $propertyName, $objectName, $extraNamespaces = [] ) { + return $this->delete( "?s ?p ?o", "?s $propertyName $objectName . ?s ?p ?o", $extraNamespaces ); + } + + /** + * Convenience method for deleting all triples of the entire store + * + * @return boolean + */ + public function deleteAll() { + return $this->delete( "?s ?p ?o", "?s ?p ?o" ); + } + + /** + * INSERT DELETE wrapper. + * The function declares the standard namespaces wiki, swivt, rdf, owl, + * rdfs, property, xsd, so these do not have to be included in + * $extraNamespaces. + * + * @param $insertPattern string CONSTRUCT pattern of tripples to insert + * @param $deletePattern string CONSTRUCT pattern of tripples to delete + * @param $where string condition for data to delete + * @param $extraNamespaces array (associative) of namespaceId => namespaceUri + * + * @return boolean stating whether the operations succeeded + */ + public function insertDelete( $insertPattern, $deletePattern, $where, $extraNamespaces = [] ) { + + $defaultGraph = $this->repositoryClient->getDefaultGraph(); + + $sparql = self::getPrefixString( $extraNamespaces ) . + ( ( $defaultGraph !== '' )? "WITH <{$defaultGraph}> " : '' ) . + "DELETE { $deletePattern } INSERT { $insertPattern } WHERE { $where }"; + + return $this->doUpdate( $sparql ); + } + + /** + * INSERT DATA wrapper. + * The function declares the standard namespaces wiki, swivt, rdf, owl, + * rdfs, property, xsd, so these do not have to be included in + * $extraNamespaces. + * + * @param $triples string of triples to insert + * @param $extraNamespaces array (associative) of namespaceId => namespaceUri + * + * @return boolean stating whether the operations succeeded + */ + public function insertData( $triples, $extraNamespaces = [] ) { + + if ( $this->repositoryClient->getDataEndpoint() !== '' ) { + $turtle = self::getPrefixString( $extraNamespaces, false ) . $triples; + return $this->doHttpPost( $turtle ); + } + + $defaultGraph = $this->repositoryClient->getDefaultGraph(); + + $sparql = self::getPrefixString( $extraNamespaces, true ) . + "INSERT DATA " . + ( ( $defaultGraph !== '' )? " { GRAPH <{$defaultGraph}> " : '' ) . + "{ $triples } " . + ( ( $defaultGraph !== '' )? " } " : '' ); + + return $this->doUpdate( $sparql ); + } + + /** + * DELETE DATA wrapper. + * The function declares the standard namespaces wiki, swivt, rdf, owl, + * rdfs, property, xsd, so these do not have to be included in + * $extraNamespaces. + * + * @param $triples string of triples to delete + * @param $extraNamespaces array (associative) of namespaceId => namespaceUri + * + * @return boolean stating whether the operations succeeded + */ + public function deleteData( $triples, $extraNamespaces = [] ) { + + $defaultGraph = $this->repositoryClient->getDefaultGraph(); + + $sparql = self::getPrefixString( $extraNamespaces ) . + "DELETE DATA { " . + ( ( $defaultGraph !== '' )? "GRAPH <{$defaultGraph}> " : '' ) . + "{ $triples } }"; + + return $this->doUpdate( $sparql ); + } + + + /** + * Execute a SPARQL query and return an RepositoryResult object + * that contains the results. The method throws exceptions based on + * GenericHttpDatabaseConnector::mapHttpRequestError(). If errors occur and this + * method does not throw anything, then an empty result with an error + * code is returned. + * + * @note This function sets the graph that is to be used as part of the + * request. Queries should not include additional graph information. + * + * @param $sparql string with the complete SPARQL query (SELECT or ASK) + * + * @return RepositoryResult + */ + public function doQuery( $sparql ) { + + if ( $this->repositoryClient->getQueryEndpoint() === '' ) { + throw new BadHttpEndpointResponseException( BadHttpEndpointResponseException::ERROR_NOSERVICE, $sparql, 'not specified' ); + } + + $this->httpRequest->setOption( CURLOPT_URL, $this->repositoryClient->getQueryEndpoint() ); + + $this->httpRequest->setOption( CURLOPT_HTTPHEADER, [ + 'Accept: application/sparql-results+xml,application/xml;q=0.8', + 'Content-Type: application/x-www-form-urlencoded;charset=UTF-8' + ] ); + + $this->httpRequest->setOption( CURLOPT_POST, true ); + + $defaultGraph = $this->repositoryClient->getDefaultGraph(); + + $parameterString = "query=" . urlencode( $sparql ) . + ( ( $defaultGraph !== '' )? '&default-graph-uri=' . urlencode( $defaultGraph ) : '' ); + + $this->httpRequest->setOption( CURLOPT_POSTFIELDS, $parameterString ); + + $httpResponse = $this->httpRequest->execute(); + + if ( $this->httpRequest->getLastErrorCode() == 0 ) { + $xmlResponseParser = new XmlResponseParser(); + return $xmlResponseParser->parse( $httpResponse ); + } + + $this->mapHttpRequestError( $this->repositoryClient->getQueryEndpoint(), $sparql ); + + $repositoryResult = new RepositoryResult(); + $repositoryResult->setErrorCode( RepositoryResult::ERROR_UNREACHABLE ); + + return $repositoryResult; + } + + /** + * Execute a SPARQL update and return a boolean to indicate if the + * operations was successful. The method throws exceptions based on + * GenericHttpDatabaseConnector::mapHttpRequestError(). If errors occur and this + * method does not throw anything, then false is returned. + * + * @note When this is written, it is not clear if the update protocol + * supports a default-graph-uri parameter. Hence the target graph for + * all updates is generally encoded in the query string and not fixed + * when sending the query. Direct callers to this function must include + * the graph information in the queries that they build. + * + * @param $sparql string with the complete SPARQL update query (INSERT or DELETE) + * + * @return boolean + */ + public function doUpdate( $sparql ) { + + if ( $this->repositoryClient->getUpdateEndpoint() === '' ) { + throw new BadHttpEndpointResponseException( BadHttpEndpointResponseException::ERROR_NOSERVICE, $sparql, 'not specified' ); + } + + $this->httpRequest->setOption( CURLOPT_URL, $this->repositoryClient->getUpdateEndpoint() ); + $this->httpRequest->setOption( CURLOPT_POST, true ); + + $parameterString = "update=" . urlencode( $sparql ); + + $this->httpRequest->setOption( CURLOPT_POSTFIELDS, $parameterString ); + $this->httpRequest->setOption( CURLOPT_HTTPHEADER, [ 'Content-Type: application/x-www-form-urlencoded;charset=UTF-8' ] ); + + $this->httpRequest->execute(); + + if ( $this->httpRequest->getLastErrorCode() == 0 ) { + return true; + } + + $this->mapHttpRequestError( $this->repositoryClient->getUpdateEndpoint(), $sparql ); + return false; + } + + /** + * Execute a HTTP-based SPARQL POST request according to + * http://www.w3.org/2009/sparql/docs/http-rdf-update/. + * The method throws exceptions based on + * GenericHttpDatabaseConnector::mapHttpRequestError(). If errors occur and this + * method does not throw anything, then an empty result with an error + * code is returned. + * + * @note This protocol is not part of the SPARQL standard and may not + * be supported by all stores. To avoid using it, simply do not provide + * a data endpoint URL when configuring the SPARQL database. If used, + * the protocol might lead to a better performance since there is less + * parsing required to fetch the data from the request. + * @note Some stores (e.g. 4Store) support another mode of posting data + * that may be implemented in a special database handler. + * + * @param $payload string Turtle serialization of data to send + * + * @return boolean + */ + public function doHttpPost( $payload ) { + + if ( $this->repositoryClient->getDataEndpoint() === '' ) { + throw new BadHttpEndpointResponseException( BadHttpEndpointResponseException::ERROR_NOSERVICE, "SPARQL POST with data: $payload", 'not specified' ); + } + + $defaultGraph = $this->repositoryClient->getDefaultGraph(); + + $this->httpRequest->setOption( CURLOPT_URL, $this->repositoryClient->getDataEndpoint() . + ( ( $defaultGraph !== '' )? '?graph=' . urlencode( $defaultGraph ) : '?default' ) ); + $this->httpRequest->setOption( CURLOPT_POST, true ); + + // POST as file (fails in 4Store) + $payloadFile = tmpfile(); + fwrite( $payloadFile, $payload ); + fseek( $payloadFile, 0 ); + + $this->httpRequest->setOption( CURLOPT_INFILE, $payloadFile ); + $this->httpRequest->setOption( CURLOPT_INFILESIZE, strlen( $payload ) ); + $this->httpRequest->setOption( CURLOPT_HTTPHEADER, [ 'Content-Type: application/x-turtle' ] ); + + $this->httpRequest->execute(); + + if ( $this->httpRequest->getLastErrorCode() == 0 ) { + return true; + } + + // TODO The error reporting based on SPARQL (Update) is not adequate for the HTTP POST protocol + $this->mapHttpRequestError( $this->repositoryClient->getDataEndpoint(), $payload ); + return false; + } + + /** + * Create the standard PREFIX declarations for SPARQL or Turtle, + * possibly with additional namespaces involved. + * + * @param $extraNamespaces array (associative) of namespaceId => namespaceUri + * @param $forSparql boolean true to use SPARQL prefix syntax, false to use Turtle prefix syntax + * + * @return string + */ + public static function getPrefixString( $extraNamespaces = [], $forSparql = true ) { + $prefixString = ''; + $prefixIntro = $forSparql ? 'PREFIX ' : '@prefix '; + $prefixOutro = $forSparql ? "\n" : " .\n"; + + foreach ( [ 'wiki', 'rdf', 'rdfs', 'owl', 'swivt', 'property', 'xsd' ] as $shortname ) { + $prefixString .= "{$prefixIntro}{$shortname}: <" . Exporter::getInstance()->getNamespaceUri( $shortname ) . ">$prefixOutro"; + unset( $extraNamespaces[$shortname] ); // avoid double declaration + } + + foreach ( $extraNamespaces as $shortname => $uri ) { + $prefixString .= "{$prefixIntro}{$shortname}: <$uri>$prefixOutro"; + } + + return $prefixString; + } + + /** + * @param $endpoint string URL of endpoint that was used + * @param $sparql string query that caused the problem + */ + protected function mapHttpRequestError( $endpoint, $sparql ) { + + if ( $this->badHttpResponseMapper === null ) { + $this->badHttpResponseMapper = new HttpResponseErrorMapper( $this->httpRequest ); + } + + $this->badHttpResponseMapper->mapErrorResponse( $endpoint, $sparql ); + } + +} |