summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/SemanticMediaWiki/src/SPARQLStore/QueryEngine/XmlResponseParser.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/extensions/SemanticMediaWiki/src/SPARQLStore/QueryEngine/XmlResponseParser.php')
-rw-r--r--www/wiki/extensions/SemanticMediaWiki/src/SPARQLStore/QueryEngine/XmlResponseParser.php234
1 files changed, 234 insertions, 0 deletions
diff --git a/www/wiki/extensions/SemanticMediaWiki/src/SPARQLStore/QueryEngine/XmlResponseParser.php b/www/wiki/extensions/SemanticMediaWiki/src/SPARQLStore/QueryEngine/XmlResponseParser.php
new file mode 100644
index 00000000..24182051
--- /dev/null
+++ b/www/wiki/extensions/SemanticMediaWiki/src/SPARQLStore/QueryEngine/XmlResponseParser.php
@@ -0,0 +1,234 @@
+<?php
+
+namespace SMW\SPARQLStore\QueryEngine;
+
+use SMW\SPARQLStore\Exception\XmlParserException;
+use SMW\SPARQLStore\HttpResponseParser;
+use SMWExpLiteral as ExpLiteral;
+use SMWExpResource as ExpResource;
+
+/**
+ * Class for parsing SPARQL results in XML format
+ *
+ * @license GNU GPL v2+
+ * @since 1.6
+ *
+ * @author Markus Krötzsch
+ */
+class XmlResponseParser implements HttpResponseParser {
+
+ /**
+ * @var resource
+ */
+ private $parser;
+
+ /**
+ * Associative array mapping SPARQL variable names to column indices.
+ * @var array of integer
+ */
+ private $header;
+
+ /**
+ * List of result rows. Individual entries can be null if a cell in the
+ * SPARQL result table is empty (this is different from finding a blank
+ * node).
+ * @var array of array of (SMWExpElement or null)
+ */
+ private $data;
+
+ /**
+ * List of comment strings found in the XML file (without surrounding
+ * markup, i.e. the actual string only).
+ * @var array of string
+ */
+ private $comments;
+
+ /**
+ * Stack of open XML tags during parsing.
+ * @var array of string
+ */
+ private $xmlOpenTags;
+
+ /**
+ * Integer index of the column that the current result binding fills.
+ * @var integer
+ */
+ private $xmlBindIndex;
+
+ /**
+ * Datatype URI for the current literal, or empty if none.
+ * @var string
+ */
+ private $currentDataType;
+
+ /**
+ * @since 2.0
+ */
+ public function __construct() {
+ $this->parser = xml_parser_create();
+
+ xml_parser_set_option( $this->parser, XML_OPTION_SKIP_WHITE, 0 );
+ xml_parser_set_option( $this->parser, XML_OPTION_TARGET_ENCODING, 'UTF-8' );
+ xml_parser_set_option( $this->parser, XML_OPTION_CASE_FOLDING, 0 );
+ xml_set_object( $this->parser, $this );
+ xml_set_element_handler( $this->parser, 'handleOpenElement', 'handleCloseElement' );
+ xml_set_character_data_handler( $this->parser, 'handleCharacterData' );
+ xml_set_default_handler( $this->parser, 'handleDefault' );
+ //xml_set_start_namespace_decl_handler($parser, 'handleNsDeclaration' );
+ }
+
+ /**
+ * @since 2.0
+ */
+ public function __destruct() {
+ xml_parser_free( $this->parser );
+ }
+
+ /**
+ * Parse the given XML result and return an RepositoryResult for
+ * the contained data.
+ *
+ * @param string $response
+ *
+ * @return RepositoryResult
+ * @throws XmlParserException
+ */
+ public function parse( $response ) {
+
+ $this->xmlOpenTags = [];
+ $this->header = [];
+ $this->data = [];
+ $this->comments = [];
+
+ // Sesame can return "" result
+ if ( $response === '' ) {
+ $this->data = [ [ new ExpLiteral( 'false', 'http://www.w3.org/2001/XMLSchema#boolean' ) ] ];
+ }
+
+ // #626 Virtuoso
+ if ( $response == 'true' ) {
+ $this->data = [ [ new ExpLiteral( 'true', 'http://www.w3.org/2001/XMLSchema#boolean' ) ] ];
+ }
+
+ // #474 Virtuoso allows `false` to be a valid raw result
+ if ( $response === '' || $response == 'false' || $response == 'true' || is_bool( $response ) || $this->parseXml( $response ) ) {
+ return new RepositoryResult(
+ $this->header,
+ $this->data,
+ $this->comments
+ );
+ }
+
+ throw new XmlParserException(
+ $this->getLastError(),
+ $this->getLastLineNumber(),
+ $this->getLastColumnNumber()
+ );
+ }
+
+ private function parseXml( $xmlResultData ) {
+ return xml_parse( $this->parser, $xmlResultData, true );
+ }
+
+ private function getLastError() {
+ return xml_error_string( xml_get_error_code( $this->parser ) );
+ }
+
+ private function getLastLineNumber() {
+ return xml_get_current_line_number( $this->parser );
+ }
+
+ private function getLastColumnNumber() {
+ return xml_get_current_column_number ( $this->parser );
+ }
+
+ private function handleDefault( $parser, $data ) {
+ if ( substr( $data, 0, 4 ) == '<!--' ) {
+ $comment = substr( $data, 4, strlen( $data ) - 7 );
+ $this->comments[] = trim( $comment );
+ }
+ }
+
+ /**
+ * @see xml_set_element_handler
+ */
+ private function handleOpenElement( $parser, $elementTag, $attributes ) {
+
+ $this->currentDataType = '';
+
+ $prevTag = end( $this->xmlOpenTags );
+ $this->xmlOpenTags[] = $elementTag;
+
+ switch ( $elementTag ) {
+ case 'binding' && ( $prevTag == 'result' ):
+ if ( ( array_key_exists( 'name', $attributes ) ) &&
+ ( array_key_exists( $attributes['name'], $this->header ) ) ) {
+ $this->xmlBindIndex = $this->header[$attributes['name']];
+ }
+ break;
+ case 'result' && ( $prevTag == 'results' ):
+ $this->data[] = array_fill( 0, count( $this->header ), null );
+ break;
+ case 'literal' && ( $prevTag == 'binding' ):
+ if ( array_key_exists( 'datatype', $attributes ) ) {
+ $this->currentDataType = $attributes['datatype'];
+ }
+ /// TODO handle xml:lang attributes here as well?
+ break;
+ case 'variable' && ( $prevTag == 'head' ):
+ if ( array_key_exists( 'name', $attributes ) ) {
+ $this->header[$attributes['name']] = count( $this->header );
+ }
+ break;
+ }
+ }
+
+ /**
+ * @see xml_set_element_handler
+ */
+ private function handleCloseElement( $parser, $elementTag ) {
+ array_pop( $this->xmlOpenTags );
+ }
+
+ /**
+ * @see xml_set_character_data_handler
+ */
+ private function handleCharacterData( $parser, $characterData ) {
+
+ $prevTag = end( $this->xmlOpenTags );
+ $rowcount = count( $this->data ) - 1;
+
+ // UTF-8 is being split therefore concatenate the string (use row as indicator
+ // to detect a sliced string)
+ if ( isset( $this->data[$rowcount] ) && ( $element = end( $this->data[$rowcount] ) ) !== null ) {
+ switch ( $prevTag ) {
+ case 'uri':
+ $characterData = $element->getUri() . $characterData;
+ break;
+ case 'literal':
+ $characterData = $element->getLexicalForm() . $characterData;
+ }
+ }
+
+ switch ( $prevTag ) {
+ case 'uri':
+ $this->data[$rowcount][$this->xmlBindIndex] = new ExpResource( $characterData );
+ break;
+ case 'literal':
+ $this->data[$rowcount][$this->xmlBindIndex] = new ExpLiteral( $characterData, $this->currentDataType );
+ break;
+ case 'bnode':
+ $this->data[$rowcount][$this->xmlBindIndex] = new ExpResource( '_' . $characterData );
+ break;
+ case 'boolean':
+ // no "results" in this case
+ $literal = new ExpLiteral( $characterData, 'http://www.w3.org/2001/XMLSchema#boolean' );
+
+ // ?? Really !!
+ $this->data = [ [ $literal ] ];
+ $this->header = [ '' => 0 ];
+ break;
+ }
+ }
+
+}