summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/SemanticMediaWiki/maintenance/dumpRDF.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/extensions/SemanticMediaWiki/maintenance/dumpRDF.php')
-rw-r--r--www/wiki/extensions/SemanticMediaWiki/maintenance/dumpRDF.php188
1 files changed, 188 insertions, 0 deletions
diff --git a/www/wiki/extensions/SemanticMediaWiki/maintenance/dumpRDF.php b/www/wiki/extensions/SemanticMediaWiki/maintenance/dumpRDF.php
new file mode 100644
index 00000000..5a411057
--- /dev/null
+++ b/www/wiki/extensions/SemanticMediaWiki/maintenance/dumpRDF.php
@@ -0,0 +1,188 @@
+<?php
+
+namespace SMW\Maintenance;
+
+use SMWExportController as ExportController;
+use SMWRDFXMLSerializer as RDFXMLSerializer;
+
+$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../..';
+
+require_once $basePath . '/maintenance/Maintenance.php';
+
+/**
+ * Usage:
+ * php dumpRDF.php [options...]
+ *
+ * --file (-o) <file> Export everything to given output file, stdout is used if omitted;
+ * file output is generally better and strongly recommended for large wikis
+ * --categories Export only categories
+ * --concepts Export only concepts
+ * --classes Export only concepts and categories
+ * --properties Export only properties
+ * --types Export only types
+ * --individuals Export only pages that are no categories, properties, or types
+ * --page <pagelist> Export only pages included in the <pagelist> with | being used as a separator.
+ * Example: --page "Page 1|Page 2", -e, -file, -d are ignored if --page is given.
+ * -d <delay> Slows down the export in order to stress the server less,
+ * sleeping for <delay> milliseconds every now and then
+ * -e <each> After how many exported entities should the process take a nap?
+ * --server=<server> The protocol and server name to as base URLs, e.g.
+ * https://en.wikipedia.org. This is sometimes necessary because
+ * server name detection may fail in command line scripts.
+ *
+ * @ingroup SMWMaintenance
+ *
+ * @license GNU GPL v2+
+ * @since 2.0
+ *
+ * @author Markus Krötzsch
+ * @author mwjames
+ */
+class DumpRdf extends \Maintenance {
+
+ private $delay = 0;
+ private $delayeach = 0;
+
+ /**
+ * @var boolean|array
+ */
+ private $restrictNamespaceTo = false;
+
+ /**
+ * @var array
+ */
+ private $pages = [];
+
+ /**
+ * @since 2.0
+ */
+ public function __construct() {
+ parent::__construct();
+
+ $this->addDescription( "\n" ."Complete RDF export of existing triples. \n" );
+ $this->addDefaultParams();
+ }
+
+ /**
+ * @see Maintenance::addDefaultParams
+ *
+ * @since 2.0
+ */
+ protected function addDefaultParams() {
+
+ parent::addDefaultParams();
+
+ $this->addOption( 'd', '<delay> Wait for this many milliseconds after processing, useful for limiting server load.', false, true );
+ $this->addOption( 'e', '<each> after how many exported entities should the process take a nap.', false, true );
+ $this->addOption( 'file', '<file> output file.', false, true, 'o' );
+
+ $this->addOption( 'categories', 'Export only categories', false );
+ $this->addOption( 'concepts', 'Export only concepts', false );
+ $this->addOption( 'classes', 'Export only classes', false );
+ $this->addOption( 'properties', 'Export only properties', false );
+ $this->addOption( 'types', 'Export only types', false );
+ $this->addOption( 'individuals', 'Export only individuals', false );
+
+ $this->addOption( 'page', 'Export only pages included in the <pagelist> with | being used as a separator. ' .
+ 'Example: --page "Page 1|Page 2", -e, -file, -d are ignored if --page is given.', false, true );
+
+ $this->addOption( 'server', '<server> The protocol and server name to as base URLs, e.g. http://en.wikipedia.org. ' .
+ 'This is sometimes necessary because server name detection may fail in command line scripts.', false, true );
+
+ $this->addOption( 'quiet', 'Do not give any output', false, false, 'q' );
+ }
+
+ /**
+ * @see Maintenance::execute
+ *
+ * @since 2.0
+ */
+ public function execute() {
+
+ if ( !defined( 'SMW_VERSION' ) ) {
+ $this->output( "You need to have SMW enabled in order to use this maintenance script!\n\n" );
+ exit;
+ }
+
+ $this->reportMessage( "\nWriting OWL/RDF dump to " . $this->getOption( 'file' ) . " ...\n" );
+ $this->setParameters()->exportRdfToOutputChannel();
+
+ return true;
+ }
+
+ /**
+ * @see Maintenance::reportMessage
+ *
+ * @since 2.0
+ *
+ * @param string $message
+ */
+ public function reportMessage( $message ) {
+ $this->output( $message );
+ }
+
+ private function setParameters() {
+
+ if ( $this->hasOption( 'd' ) ) {
+ $this->delay = intval( $this->getOption( 'd' ) ) * 1000;
+ }
+
+ $this->delayeach = ( $this->delay === 0 ) ? 0 : 1;
+
+ if ( $this->hasOption( 'e' ) ) {
+ $this->delayeach = intval( $this->getOption( 'e' ) );
+ }
+
+ if ( $this->hasOption( 'categories' ) ) {
+ $this->restrictNamespaceTo = NS_CATEGORY;
+ } elseif ( $this->hasOption( 'concepts' ) ) {
+ $this->restrictNamespaceTo = SMW_NS_CONCEPT;
+ } elseif ( $this->hasOption( 'classes' ) ) {
+ $this->restrictNamespaceTo = [ NS_CATEGORY, SMW_NS_CONCEPT ];
+ } elseif ( $this->hasOption( 'properties' ) ) {
+ $this->restrictNamespaceTo = SMW_NS_PROPERTY;
+ } elseif ( $this->hasOption( 'individuals' ) ) {
+ $this->restrictNamespaceTo = - 1;
+ }
+
+ if ( $this->hasOption( 'page' ) ) {
+ $this->pages = explode( '|', $this->getOption( 'page' ) );
+ }
+
+ if ( $this->hasOption( 'server' ) ) {
+ $GLOBALS['wgServer'] = $this->getOption( 'server' );
+ }
+
+ return $this;
+ }
+
+ private function exportRdfToOutputChannel() {
+
+ $exportController = new ExportController( new RDFXMLSerializer() );
+
+ if ( $this->pages !== [] ) {
+ return $exportController->printPages(
+ $this->pages
+ );
+ }
+
+ if ( $this->hasOption( 'file' ) ) {
+ return $exportController->printAllToFile(
+ $this->getOption( 'file' ),
+ $this->restrictNamespaceTo,
+ $this->delay,
+ $this->delayeach
+ );
+ }
+
+ $exportController->printAllToOutput(
+ $this->restrictNamespaceTo,
+ $this->delay,
+ $this->delayeach
+ );
+ }
+
+}
+
+$maintClass = 'SMW\Maintenance\DumpRdf';
+require_once ( RUN_MAINTENANCE_IF_MAIN );