summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/SemanticMediaWiki/maintenance/dumpRDF.php
blob: 5a411057b16c24a3d5765b92c77f0efd38a67b1a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
<?php

namespace SMW\Maintenance;

use SMWExportController as ExportController;
use SMWRDFXMLSerializer as RDFXMLSerializer;

$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../..';

require_once $basePath . '/maintenance/Maintenance.php';

/**
 * Usage:
 * php dumpRDF.php [options...]
 *
 * --file (-o) <file> Export everything to given output file, stdout is used if omitted;
 *                    file output is generally better and strongly recommended for large wikis
 * --categories       Export only categories
 * --concepts         Export only concepts
 * --classes          Export only concepts and categories
 * --properties       Export only properties
 * --types            Export only types
 * --individuals      Export only pages that are no categories, properties, or types
 * --page <pagelist>  Export only pages included in the <pagelist> with | being used as a separator.
 *                    Example: --page "Page 1|Page 2", -e, -file, -d are ignored if --page is given.
 * -d <delay>         Slows down the export in order to stress the server less,
 *                    sleeping for <delay> milliseconds every now and then
 * -e <each>          After how many exported entities should the process take a nap?
 * --server=<server>  The protocol and server name to as base URLs, e.g.
 *                    https://en.wikipedia.org. This is sometimes necessary because
 *                    server name detection may fail in command line scripts.
 *
 * @ingroup SMWMaintenance
 *
 * @license GNU GPL v2+
 * @since 2.0
 *
 * @author Markus Krötzsch
 * @author mwjames
 */
class DumpRdf extends \Maintenance {

	private $delay = 0;
	private $delayeach = 0;

	/**
	 * @var boolean|array
	 */
	private $restrictNamespaceTo = false;

	/**
	 * @var array
	 */
	private $pages = [];

	/**
	 * @since 2.0
	 */
	public function __construct() {
		parent::__construct();

		$this->addDescription( "\n" ."Complete RDF export of existing triples. \n" );
		$this->addDefaultParams();
	}

	/**
	 * @see Maintenance::addDefaultParams
	 *
	 * @since 2.0
	 */
	protected function addDefaultParams() {

		parent::addDefaultParams();

		$this->addOption( 'd', '<delay> Wait for this many milliseconds after processing, useful for limiting server load.', false, true );
		$this->addOption( 'e', '<each> after how many exported entities should the process take a nap.', false, true );
		$this->addOption( 'file', '<file> output file.', false, true, 'o' );

		$this->addOption( 'categories', 'Export only categories', false );
		$this->addOption( 'concepts', 'Export only concepts', false );
		$this->addOption( 'classes', 'Export only classes', false );
		$this->addOption( 'properties', 'Export only properties', false );
		$this->addOption( 'types', 'Export only types', false );
		$this->addOption( 'individuals', 'Export only individuals', false );

		$this->addOption( 'page', 'Export only pages included in the <pagelist> with | being used as a separator. ' .
								'Example: --page "Page 1|Page 2", -e, -file, -d are ignored if --page is given.', false, true );

		$this->addOption( 'server', '<server> The protocol and server name to as base URLs, e.g. http://en.wikipedia.org. ' .
								'This is sometimes necessary because server name detection may fail in command line scripts.', false, true );

		$this->addOption( 'quiet', 'Do not give any output', false, false, 'q' );
	}

	/**
	 * @see Maintenance::execute
	 *
	 * @since 2.0
	 */
	public function execute() {

		if ( !defined( 'SMW_VERSION' ) ) {
			$this->output( "You need to have SMW enabled in order to use this maintenance script!\n\n" );
			exit;
		}

		$this->reportMessage( "\nWriting OWL/RDF dump to " . $this->getOption( 'file' ) . " ...\n" );
		$this->setParameters()->exportRdfToOutputChannel();

		return true;
	}

	/**
	 * @see Maintenance::reportMessage
	 *
	 * @since 2.0
	 *
	 * @param string $message
	 */
	public function reportMessage( $message ) {
		$this->output( $message );
	}

	private function setParameters() {

		if ( $this->hasOption( 'd' ) ) {
			$this->delay = intval( $this->getOption( 'd' ) ) * 1000;
		}

		$this->delayeach = ( $this->delay === 0 ) ? 0 : 1;

		if ( $this->hasOption( 'e' ) ) {
			$this->delayeach = intval( $this->getOption( 'e' )  );
		}

		if ( $this->hasOption( 'categories' ) ) {
			$this->restrictNamespaceTo = NS_CATEGORY;
		} elseif ( $this->hasOption( 'concepts' ) ) {
			$this->restrictNamespaceTo = SMW_NS_CONCEPT;
		} elseif ( $this->hasOption( 'classes' ) ) {
			$this->restrictNamespaceTo = [ NS_CATEGORY, SMW_NS_CONCEPT ];
		} elseif ( $this->hasOption( 'properties' ) ) {
			$this->restrictNamespaceTo = SMW_NS_PROPERTY;
		} elseif ( $this->hasOption( 'individuals' ) ) {
			$this->restrictNamespaceTo = - 1;
		}

		if ( $this->hasOption( 'page' ) ) {
			$this->pages = explode( '|', $this->getOption( 'page' ) );
		}

		if ( $this->hasOption( 'server' ) ) {
			$GLOBALS['wgServer'] = $this->getOption( 'server' );
		}

		return $this;
	}

	private function exportRdfToOutputChannel() {

		$exportController = new ExportController( new RDFXMLSerializer() );

		if ( $this->pages !== [] ) {
			return $exportController->printPages(
				$this->pages
			);
		}

		if ( $this->hasOption( 'file' ) ) {
			return $exportController->printAllToFile(
				$this->getOption( 'file' ),
				$this->restrictNamespaceTo,
				$this->delay,
				$this->delayeach
			);
		}

		$exportController->printAllToOutput(
			$this->restrictNamespaceTo,
			$this->delay,
			$this->delayeach
		);
	}

}

$maintClass = 'SMW\Maintenance\DumpRdf';
require_once ( RUN_MAINTENANCE_IF_MAIN );