diff options
author | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
---|---|---|
committer | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
commit | fc7369835258467bf97eb64f184b93691f9a9fd5 (patch) | |
tree | daabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/extensions/SemanticMediaWiki/maintenance |
first commit
Diffstat (limited to 'www/wiki/extensions/SemanticMediaWiki/maintenance')
11 files changed, 2080 insertions, 0 deletions
diff --git a/www/wiki/extensions/SemanticMediaWiki/maintenance/README.md b/www/wiki/extensions/SemanticMediaWiki/maintenance/README.md new file mode 100644 index 00000000..37f675da --- /dev/null +++ b/www/wiki/extensions/SemanticMediaWiki/maintenance/README.md @@ -0,0 +1,106 @@ +## Semantic MediaWiki maintenance scripts + +Scripts can be run with a command line PHP call if your MediaWiki is +properly configured to run maintenance scripts. + +If you keep SMW in the standard directory `./extensions/SemanticMediaWiki` +below your MediaWiki installation, then you can run these scripts from +almost anywhere. + +Otherwise, it is required to set the environment variable `MW_INSTALL_PATH` +to the root of your MediaWiki installation first. This is also required if +you use a symbolic link from `./extensions/SemanticMediaWiki` to the actual +installation directory of Semantic MediaWiki. Setting environment variables +is different for different operating systems and shells, but can normally be +done from the command line right before the php call. On Bash (Linux), e.g. +one can use the following call to execute "setupStore.php" with a different +MediaWiki location: + + export MW_INSTALL_PATH="/path/to/mediawiki" && php setupStore.php + +In some setups that use a lot of shared code for many wikis, it might be +required to specify the location of "LocalSettings.php" explicitly, too: + +``` +export MW_INSTALL_PATH="/path/to/mediawiki" && php setupStore.php --conf=/path/to/mediawiki/LocalSettings.php +``` + +### dumpRDF.php + +Complete RDF export of existing triples. + +Usage: +- php dumpRDF.php +- [--categories|--classes|--concepts|--conf|--d|--dbpass|--dbuser|--e|--file|--globals|--help|--individuals|--memory-limit|--page|--profiler|--properties|--quiet|--server|--types|--wiki] + +### populateHashField.php + +Populate the `smw_hash` field for all entities that have a missing entry. + +Usage: +- php populateHashField.php +- [--conf|--dbpass|--dbuser|--globals|--help|--memory-limit|--profiler|--quiet|--server|--wiki] + +### rebuildConceptCache.php + +Manages concept caches in Semantic MediaWiki. + +Usage: +- php rebuildConceptCache.php +- [--concept|--conf|--create|--dbpass|--dbuser|--debug|--delete|--e|--globals|--hard|--help|--memory-limit|--old|--profiler|--quiet|--report-runtime|--s|--server|--status|--update|--verbose|--wiki|--with-maintenance-log] + +### rebuildData.php + +Recreates all the semantic data in the database + +Usage: +- php rebuildData.php +- [--b|--categories|--conf|--d|--dbpass|--dbuser|--debug|--dispose-outdated|--e|--exception-log|--f|--force-update|--globals|--help|--ignore-exceptions|--memory-limit|--n|--no-cache|--p|--page|--profiler|--property-statistics|--query|--quiet|--redirects|--report-poolcache|--report-runtime|--revision-mode|--s|--server|--shallow-update|--skip-properties|--startidfile|--v|--wiki|--with-maintenance-log] + +### rebuildElasticIndex.php + +Rebuilds the Elasticsearch index. + +Usage: +- php rebuildElasticIndex.php +- [--conf|--dbpass|--dbuser|--debug|--delete-all|--e|--force-refresh|--globals|--help|--memory-limit|--page|--profiler|--quiet|--report-runtime|--run-fileindex|--s|--server|--skip-fileindex|--update-settings|--wiki] + +### rebuildFulltextSearchTable.php + +Rebuilds the fulltext search index. + +Usage: +- php rebuildFulltextSearchTable.php +- [--conf|--dbpass|--dbuser|--globals|--help|--memory-limit|--optimize|--profiler|--quick|--quiet|--report-runtime|--server|--v|--wiki|--with-maintenance-log] + +### rebuildPropertyStatistics.php + +Rebuilds the property usage statistics + +Usage: +- php rebuildPropertyStatistics.php +- [--conf|--dbpass|--dbuser|--globals|--help|--memory-limit|--profiler|--quiet|--server|--wiki|--with-maintenance-log] + +### removeDuplicateEntities.php + +Removes duplicate entities. + +Usage: +- php removeDuplicateEntities.php +- [--conf|--dbpass|--dbuser|--globals|--help|--memory-limit|--profiler|--quiet|--s|--server|--wiki] + +### setupStore.php + +Sets up the storage backend. + +Usage: +- php setupStore.php +- [--backend|--conf|--dbpass|--dbuser|--delete|--globals|--help|--memory-limit|--nochecks|--profiler|--quiet|--server|--skip-import|--skip-optimize|--wiki] + +### updateEntityCollation.php + +Updates the `smw_sort` field. + +Usage: +- php updateEntityCollation.php +- [--conf|--dbpass|--dbuser|--globals|--help|--memory-limit|--profiler|--quiet|--s|--server|--wiki] diff --git a/www/wiki/extensions/SemanticMediaWiki/maintenance/dumpRDF.php b/www/wiki/extensions/SemanticMediaWiki/maintenance/dumpRDF.php new file mode 100644 index 00000000..5a411057 --- /dev/null +++ b/www/wiki/extensions/SemanticMediaWiki/maintenance/dumpRDF.php @@ -0,0 +1,188 @@ +<?php + +namespace SMW\Maintenance; + +use SMWExportController as ExportController; +use SMWRDFXMLSerializer as RDFXMLSerializer; + +$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../..'; + +require_once $basePath . '/maintenance/Maintenance.php'; + +/** + * Usage: + * php dumpRDF.php [options...] + * + * --file (-o) <file> Export everything to given output file, stdout is used if omitted; + * file output is generally better and strongly recommended for large wikis + * --categories Export only categories + * --concepts Export only concepts + * --classes Export only concepts and categories + * --properties Export only properties + * --types Export only types + * --individuals Export only pages that are no categories, properties, or types + * --page <pagelist> Export only pages included in the <pagelist> with | being used as a separator. + * Example: --page "Page 1|Page 2", -e, -file, -d are ignored if --page is given. + * -d <delay> Slows down the export in order to stress the server less, + * sleeping for <delay> milliseconds every now and then + * -e <each> After how many exported entities should the process take a nap? + * --server=<server> The protocol and server name to as base URLs, e.g. + * https://en.wikipedia.org. This is sometimes necessary because + * server name detection may fail in command line scripts. + * + * @ingroup SMWMaintenance + * + * @license GNU GPL v2+ + * @since 2.0 + * + * @author Markus Krötzsch + * @author mwjames + */ +class DumpRdf extends \Maintenance { + + private $delay = 0; + private $delayeach = 0; + + /** + * @var boolean|array + */ + private $restrictNamespaceTo = false; + + /** + * @var array + */ + private $pages = []; + + /** + * @since 2.0 + */ + public function __construct() { + parent::__construct(); + + $this->addDescription( "\n" ."Complete RDF export of existing triples. \n" ); + $this->addDefaultParams(); + } + + /** + * @see Maintenance::addDefaultParams + * + * @since 2.0 + */ + protected function addDefaultParams() { + + parent::addDefaultParams(); + + $this->addOption( 'd', '<delay> Wait for this many milliseconds after processing, useful for limiting server load.', false, true ); + $this->addOption( 'e', '<each> after how many exported entities should the process take a nap.', false, true ); + $this->addOption( 'file', '<file> output file.', false, true, 'o' ); + + $this->addOption( 'categories', 'Export only categories', false ); + $this->addOption( 'concepts', 'Export only concepts', false ); + $this->addOption( 'classes', 'Export only classes', false ); + $this->addOption( 'properties', 'Export only properties', false ); + $this->addOption( 'types', 'Export only types', false ); + $this->addOption( 'individuals', 'Export only individuals', false ); + + $this->addOption( 'page', 'Export only pages included in the <pagelist> with | being used as a separator. ' . + 'Example: --page "Page 1|Page 2", -e, -file, -d are ignored if --page is given.', false, true ); + + $this->addOption( 'server', '<server> The protocol and server name to as base URLs, e.g. http://en.wikipedia.org. ' . + 'This is sometimes necessary because server name detection may fail in command line scripts.', false, true ); + + $this->addOption( 'quiet', 'Do not give any output', false, false, 'q' ); + } + + /** + * @see Maintenance::execute + * + * @since 2.0 + */ + public function execute() { + + if ( !defined( 'SMW_VERSION' ) ) { + $this->output( "You need to have SMW enabled in order to use this maintenance script!\n\n" ); + exit; + } + + $this->reportMessage( "\nWriting OWL/RDF dump to " . $this->getOption( 'file' ) . " ...\n" ); + $this->setParameters()->exportRdfToOutputChannel(); + + return true; + } + + /** + * @see Maintenance::reportMessage + * + * @since 2.0 + * + * @param string $message + */ + public function reportMessage( $message ) { + $this->output( $message ); + } + + private function setParameters() { + + if ( $this->hasOption( 'd' ) ) { + $this->delay = intval( $this->getOption( 'd' ) ) * 1000; + } + + $this->delayeach = ( $this->delay === 0 ) ? 0 : 1; + + if ( $this->hasOption( 'e' ) ) { + $this->delayeach = intval( $this->getOption( 'e' ) ); + } + + if ( $this->hasOption( 'categories' ) ) { + $this->restrictNamespaceTo = NS_CATEGORY; + } elseif ( $this->hasOption( 'concepts' ) ) { + $this->restrictNamespaceTo = SMW_NS_CONCEPT; + } elseif ( $this->hasOption( 'classes' ) ) { + $this->restrictNamespaceTo = [ NS_CATEGORY, SMW_NS_CONCEPT ]; + } elseif ( $this->hasOption( 'properties' ) ) { + $this->restrictNamespaceTo = SMW_NS_PROPERTY; + } elseif ( $this->hasOption( 'individuals' ) ) { + $this->restrictNamespaceTo = - 1; + } + + if ( $this->hasOption( 'page' ) ) { + $this->pages = explode( '|', $this->getOption( 'page' ) ); + } + + if ( $this->hasOption( 'server' ) ) { + $GLOBALS['wgServer'] = $this->getOption( 'server' ); + } + + return $this; + } + + private function exportRdfToOutputChannel() { + + $exportController = new ExportController( new RDFXMLSerializer() ); + + if ( $this->pages !== [] ) { + return $exportController->printPages( + $this->pages + ); + } + + if ( $this->hasOption( 'file' ) ) { + return $exportController->printAllToFile( + $this->getOption( 'file' ), + $this->restrictNamespaceTo, + $this->delay, + $this->delayeach + ); + } + + $exportController->printAllToOutput( + $this->restrictNamespaceTo, + $this->delay, + $this->delayeach + ); + } + +} + +$maintClass = 'SMW\Maintenance\DumpRdf'; +require_once ( RUN_MAINTENANCE_IF_MAIN ); diff --git a/www/wiki/extensions/SemanticMediaWiki/maintenance/populateHashField.php b/www/wiki/extensions/SemanticMediaWiki/maintenance/populateHashField.php new file mode 100644 index 00000000..e19d0e69 --- /dev/null +++ b/www/wiki/extensions/SemanticMediaWiki/maintenance/populateHashField.php @@ -0,0 +1,222 @@ +<?php + +namespace SMW\Maintenance; + +use Onoi\MessageReporter\MessageReporter; +use SMW\ApplicationFactory; +use SMW\SQLStore\SQLStore; +use SMW\SQLStore\Installer; +use SMW\Setup; +use SMW\Store; + +$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv('MW_INSTALL_PATH' ) : __DIR__ . '/../../..'; + +require_once $basePath . '/maintenance/Maintenance.php'; + +/** + * @license GNU GPL v2+ + * @since 3.1 + * + * @author mwjames + */ +class PopulateHashField extends \Maintenance { + + /** + * Threshold as the when the `populateHashField.php` should be used by an + * administrator instead. + * + * This postpones the execution to after `setupStore.php`/`update.php` in + * order to help minimize the time required for the initial setup/upgrade. + */ + const COUNT_SCRIPT_EXECUTION_THRESHOLD = 200000; + + /** + * @var Store + */ + private $store; + + /** + * @var MessageReporter + */ + private $messageReporter; + + /** + * @since 3.1 + */ + public function __construct() { + $this->mDescription = "Populate the 'smw_hash' field for all entities that have a missing entry."; + parent::__construct(); + } + + /** + * @since 3.1 + * + * @param Store $store + */ + public function setComplete( $incomplete ) { + + $this->reportMessage( + " ... writing the status to the setup information file ... \n" + ); + + Installer::setUpgradeFile( + $GLOBALS, + [ + Installer::POPULATE_HASH_FIELD_COMPLETE => $incomplete + ] + ); + } + + /** + * @since 3.1 + * + * @param Store $store + */ + public function setStore( Store $store ) { + $this->store = $store; + } + + /** + * @since 3.1 + * + * @param MessageReporter $messageReporter + */ + public function setMessageReporter( MessageReporter $messageReporter ) { + $this->messageReporter = $messageReporter; + } + + /** + * @since 3.1 + * + * @param string $message + */ + public function reportMessage( $message ) { + + if ( $this->messageReporter !== null ) { + return $this->messageReporter->reportMessage( $message ); + } + + $this->output( $message ); + } + + /** + * @see Maintenance::execute + */ + public function execute() { + + if ( !Setup::isEnabled() ) { + $this->reportMessage( "\nYou need to have Semantic MediaWiki enabled in order to run the maintenance script!\n" ); + exit; + } + + $this->store = ApplicationFactory::getInstance()->getStore( + 'SMW\SQLStore\SQLStore' + ); + + $this->reportMessage( "\nChecking 'smw_hash' field consistency ...\n" ); + $this->populate(); + + $this->reportMessage( " ... done.\n" ); + + return true; + } + + /** + * @since 3.1 + * + * @return Iterator + */ + public function fetchRows() { + + $connection = $this->store->getConnection( 'mw.db' ); + + return $connection->select( + SQLStore::ID_TABLE, + [ + 'smw_id', + 'smw_title', + 'smw_namespace', + 'smw_iw', + 'smw_subobject' + ], + [ + 'smw_hash' => null, + 'smw_iw != ' . $connection->addQuotes( SMW_SQL3_SMWDELETEIW ) + ], + __METHOD__ + ); + } + + /** + * @since 3.1 + * + * @param Iterator $rows + */ + public function populate( \Iterator $rows = null ) { + + if ( $rows === null ) { + $rows = $this->fetchRows(); + } + + $connection = $this->store->getConnection( 'mw.db' ); + $idTable = $this->store->getObjectIds(); + + $count = 0; + $i = 0; + + if ( $rows !== null ) { + $count = $rows->numRows(); + } + + if ( $count == 0 ) { + $this->reportMessage( " ... all rows populated ...\n" ); + } else { + $this->reportMessage( " ... missing $count rows ...\n" ); + + foreach ( $rows as $row ) { + + $hash = $idTable->computeSha1( + [ + $row->smw_title, + (int)$row->smw_namespace, + $row->smw_iw, + $row->smw_subobject + ] + ); + + $this->reportMessage( + $this->progress( $row->smw_id, $i++, $count ) + ); + + $connection->update( + SQLStore::ID_TABLE, + [ + 'smw_hash' => $hash + ], + [ + 'smw_id' => $row->smw_id + ], + __METHOD__ + ); + } + } + + $this->reportMessage( "\n" ); + $this->setComplete( true ); + } + + /** + * @see Maintenance::addDefaultParams + */ + protected function addDefaultParams() { + parent::addDefaultParams(); + } + + private function progress( $id, $i, $count ) { + return "\r". sprintf( "%-35s%s", " ... updating document no.", sprintf( "%s (%1.0f%%)", $id, round( ( $i / $count ) * 100 ) ) ); + } + +} + +$maintClass = 'SMW\Maintenance\PopulateHashField'; +require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/www/wiki/extensions/SemanticMediaWiki/maintenance/rebuildConceptCache.php b/www/wiki/extensions/SemanticMediaWiki/maintenance/rebuildConceptCache.php new file mode 100644 index 00000000..86a77df5 --- /dev/null +++ b/www/wiki/extensions/SemanticMediaWiki/maintenance/rebuildConceptCache.php @@ -0,0 +1,188 @@ +<?php + +namespace SMW\Maintenance; + +use SMW\ApplicationFactory; +use SMW\Setup; + +$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../..'; + +require_once $basePath . '/maintenance/Maintenance.php'; + +/** + * Manage concept caches + * + * This script is used to manage concept caches for Semantic MediaWiki. Concepts + * are semantic queries stored on Concept: pages. The elements of concepts can be + * computed online, or they can come from a pre-computed cache. The wiki may even + * be configured to display certain concepts only if they are available cached. + * + * This script can create, delete and update these caches, or merely show their + * status. + * + * Usage: php rebuildConceptCache.php <action> [<select concepts>] [<options>] + * + * Actions: + * --help Show this message. + * --status Show the cache status of the selected concepts. + * --create Rebuild caches for the selected concepts. + * --delete Remove all caches for the selected concepts. + * + * If no further options are given, all concepts in the wiki are processed. + * + * Select concepts: + * --concept 'Concept name' Process only this one concept. + * --hard Process only concepts that are not allowed to be computed + * online according to the current wiki settings. + * --update Process only concepts that already have some cache, i.e. do + * not create any new caches. For the opposite (only concepts + * without caches), use --old with a very high number. + * --old <min> Process only concepts with caches older than <min> minutes + * or with no caches at all. + * -s <startid> Process only concepts with page id of at least <startid> + * -e <endid> Process only concepts with page id of at most <endid> + * + * Selection options can be combined to process only concepts that meet all the + * requirements at once. If --concept is given, then -s and -e are ignored. + * + * Options: + * --quiet Do not give any output. + * --verbose Give additional output. No effect if --quiet is given. + * + * Use option --help for usage details. + * + * Note: if SMW is not installed in its standard path under ./extensions + * then the MW_INSTALL_PATH environment variable must be set. + * See README in the maintenance directory. + * + * @ingroup Maintenance + * + * @licence GNU GPL v2+ + * @since 1.9.2 + * + * @author Markus Krötzsch + * @author mwjames + */ +class RebuildConceptCache extends \Maintenance { + + public function __construct() { + parent::__construct(); + + $this->addDescription( "\n" . + "This script is used to manage concept caches for Semantic MediaWiki. Concepts \n" . + "are semantic queries stored on Concept: pages. The elements of concepts can be \n" . + "computed online, or they can come from a pre-computed cache. The wiki may even \n" . + "be configured to display certain concepts only if they are available cached. \n" . + "\n" . "This script can create, delete and update these caches, or merely show their \n". + "status. " + ); + + $this->addDefaultParams(); + } + + /** + * @see Maintenance::addDefaultParams + */ + protected function addDefaultParams() { + + parent::addDefaultParams(); + + // Actions + $this->addOption( 'status', 'Show the cache status of the selected concepts' ); + $this->addOption( 'create', 'Rebuild caches for the selected concepts.' ); + $this->addOption( 'delete', 'Remove all caches for the selected concepts.' ); + + // Options + $this->addOption( 'concept', '"Concept name" Process only this one concept.', false, true ); + $this->addOption( 'hard', 'Process only concepts that are not allowed to be computed online according to the current wiki settings.' ); + + $this->addOption( 'update', 'Process only concepts that already have some cache, i.e. do not create any new caches. ' . + 'For the opposite (only concepts without caches), use --old with a very high number.' ); + + $this->addOption( 'old', '<min> Process only concepts with caches older than <min> minutes or with no caches at all.', false, true ); + $this->addOption( 's', '<startid> Process only concepts with page id of at least <startid>', false, true ); + $this->addOption( 'e', '<endid> Process only concepts with page id of at most <endid>', false, true ); + + $this->addOption( 'with-maintenance-log', 'Add log entry to `Special:Log` about the maintenance run.', false ); + $this->addOption( 'report-runtime', 'Report execution time and memory usage', false ); + $this->addOption( 'debug', 'Sets global variables to support debug ouput while running the script', false ); + $this->addOption( 'quiet', 'Do not give any output', false ); + $this->addOption( 'verbose', 'Give additional output. No effect if --quiet is given.', false ); + } + + /** + * @see Maintenance::execute + */ + public function execute() { + + if ( !Setup::isEnabled() ) { + $this->reportMessage( "\nYou need to have SMW enabled in order to run the maintenance script!\n" ); + exit; + } + + if ( !Setup::isValid( true ) ) { + $this->reportMessage( "\nYou need to run `update.php` or `setupStore.php` first before continuing\nwith any maintenance tasks!\n" ); + exit; + } + + $applicationFactory = ApplicationFactory::getInstance(); + $maintenanceFactory = $applicationFactory->newMaintenanceFactory(); + + $maintenanceHelper = $maintenanceFactory->newMaintenanceHelper(); + $maintenanceHelper->initRuntimeValues(); + + if ( $this->hasOption( 'debug' ) ) { + $maintenanceHelper->setGlobalToValue( 'wgShowExceptionDetails', true ); + $maintenanceHelper->setGlobalToValue( 'wgShowSQLErrors', true ); + $maintenanceHelper->setGlobalToValue( 'wgShowDBErrorBacktrace', true ); + } + + $conceptCacheRebuilder = $maintenanceFactory->newConceptCacheRebuilder( + $applicationFactory->getStore(), + [ $this, 'reportMessage' ] + ); + + $conceptCacheRebuilder->setParameters( $this->mOptions ); + + $result = $this->checkForRebuildState( + $conceptCacheRebuilder->rebuild() + ); + + if ( $result && $this->hasOption( 'report-runtime' ) ) { + $this->reportMessage( "\n" . "Runtime report ..." . "\n" ); + $this->reportMessage( $maintenanceHelper->getFormattedRuntimeValues( ' ...' ) . "\n" ); + } + + if ( $this->hasOption( 'with-maintenance-log' ) ) { + $maintenanceLogger = $maintenanceFactory->newMaintenanceLogger( 'RebuildConceptCacheLogger' ); + $maintenanceLogger->log( $maintenanceHelper->getFormattedRuntimeValues() ); + } + + $maintenanceHelper->reset(); + + return $result; + } + + /** + * @since 1.9.2 + * + * @param string $message + */ + public function reportMessage( $message ) { + $this->output( $message ); + } + + private function checkForRebuildState( $rebuildResult ) { + + if ( !$rebuildResult ) { + $this->reportMessage( $this->mDescription . "\n\n" . 'Use option --help for usage details.' . "\n" ); + return false; + } + + return true; + } + +} + +$maintClass = 'SMW\Maintenance\RebuildConceptCache'; +require_once ( RUN_MAINTENANCE_IF_MAIN ); diff --git a/www/wiki/extensions/SemanticMediaWiki/maintenance/rebuildData.php b/www/wiki/extensions/SemanticMediaWiki/maintenance/rebuildData.php new file mode 100644 index 00000000..e73931de --- /dev/null +++ b/www/wiki/extensions/SemanticMediaWiki/maintenance/rebuildData.php @@ -0,0 +1,225 @@ +<?php + +namespace SMW\Maintenance; + +use SMW\ApplicationFactory; +use SMW\StoreFactory; +use SMW\Store; +use SMW\Setup; +use SMW\Options; + +$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../..'; + +require_once $basePath . '/maintenance/Maintenance.php'; + +/** + * Recreates all the semantic data in the database, by cycling through all + * the pages that might have semantic data, and calling functions that + * re-save semantic data for each one. + * + * Note: if SMW is not installed in its standard path under ./extensions + * then the MW_INSTALL_PATH environment variable must be set. + * See README in the maintenance directory. + * + * Usage: + * php rebuildData.php [options...] + * + * -d <delay> Wait for this many milliseconds after processing an article, useful for limiting server load. + * -s <startid> Start refreshing at given article ID, useful for partial refreshing + * -e <endid> Stop refreshing at given article ID, useful for partial refreshing + * -n <numids> Stop refreshing after processing a given number of IDs, useful for partial refreshing + * --startidfile <startidfile> Read <startid> from a file instead of the arguments and write the next id + * to the file when finished. Useful for continual partial refreshing from cron. + * -b <backend> Execute the operation for the storage backend of the given name + * (default is to use the current backend) + * -v Be verbose about the progress. + * -c Will refresh only category pages (and other explicitly named namespaces) + * -p Will refresh only property pages (and other explicitly named namespaces) + * --page=<pagelist> will refresh only the pages of the given names, with | used as a separator. + * Example: --page="Page 1|Page 2" refreshes Page 1 and Page 2 + * Options -s, -e, -n, --startidfile, -c, -p, -t are ignored if --page is given. + * --query=<query> Will refresh only pages returned by a given query. + * Example: --query='[[Category:SomeCategory]]' + * -f Fully delete all content instead of just refreshing relevant entries. This will also + * rebuild the whole storage structure. May leave the wiki temporarily incomplete. + * --server=<server> The protocol and server name to as base URLs, e.g. + * http://en.wikipedia.org. This is sometimes necessary because + * server name detection may fail in command line scripts. + * + * @author Yaron Koren + * @author Markus Krötzsch + */ +class RebuildData extends \Maintenance { + + public function __construct() { + parent::__construct(); + + $this->addDescription( "\n" . + "Recreates all the semantic data in the database, by cycling through all \n" . + "the pages that might have semantic data, and calling functions that \n" . + "re-save semantic data for each one. \n" + ); + + $this->addDefaultParams(); + } + + /** + * @see Maintenance::addDefaultParams + */ + protected function addDefaultParams() { + + parent::addDefaultParams(); + + $this->addOption( 'd', '<delay> Wait for this many milliseconds after processing an article, useful for limiting server load.', false, true ); + $this->addOption( 's', '<startid> Start refreshing at given article ID, useful for partial refreshing.', false, true ); + $this->addOption( 'e', '<endid> Stop refreshing at given article ID, useful for partial refreshing.', false, true ); + $this->addOption( 'n', '<numids> Stop refreshing after processing a given number of IDs, useful for partial refreshing.', false, true ); + + $this->addOption( 'startidfile', '<startidfile> Read <startid> from a file instead of the arguments and write the next id to the file when finished. ' . + 'Useful for continual partial refreshing from cron.', false, true ); + + $this->addOption( 'b', '<backend> Execute the operation for the storage backend of the given name (default is to use the current backend).', false, true ); + + $this->addOption( 'f', 'Fully delete all content instead of just refreshing relevant entries. This will also rebuild the whole storage structure. ' . + 'May leave the wiki temporarily incomplete.', false ); + + $this->addOption( 'v', 'Be verbose about the progress', false ); + $this->addOption( 'p', 'Only refresh property pages (and other explicitly named namespaces)', false ); + $this->addOption( 'categories', 'Only refresh category pages (and other explicitly named namespaces)', false, false, 'c' ); + $this->addOption( 'redirects', 'Only refresh redirect pages', false ); + $this->addOption( 'dispose-outdated', 'Only Remove outdated marked entities (including pending references).', false ); + + $this->addOption( 'skip-properties', 'Skip the default properties rebuild (only recommended when successive build steps are used)', false ); + $this->addOption( 'shallow-update', 'Skip processing of entities that compare to the last known revision date', false ); + $this->addOption( 'property-statistics', 'Execute `rebuildPropertyStatistics` after the `rebuildData` run has finished.', false ); + + $this->addOption( 'force-update', 'Force an update even when an associated revision is known', false ); + $this->addOption( 'revision-mode', 'Skip entities where its associated revision matches the latests referenced revision of an associated page', false ); + + $this->addOption( 'ignore-exceptions', 'Ignore exceptions and log exception to a file', false ); + $this->addOption( 'exception-log', 'Exception log file location (e.g. /tmp/logs/)', false, true ); + $this->addOption( 'with-maintenance-log', 'Add log entry to `Special:Log` about the maintenance run.', false ); + + $this->addOption( 'page', '<pagelist> Will refresh only the pages of the given names, with | used as a separator. ' . + 'Example: --page "Page 1|Page 2" refreshes Page 1 and Page 2 Options -s, -e, -n, ' . + '--startidfile, -c, -p, -t are ignored if --page is given.', false, true ); + + $this->addOption( 'server', '<server> The protocol and server name to as base URLs, e.g. http://en.wikipedia.org. ' . + 'This is sometimes necessary because server name detection may fail in command line scripts.', false, true ); + + $this->addOption( 'query', "<query> Will refresh only pages returned by a given query. Example: --query='[[Category:SomeCategory]]'", false, true ); + + $this->addOption( 'report-runtime', 'Report execution time and memory usage', false ); + $this->addOption( 'report-poolcache', 'Report internal poolcache memory usage', false ); + $this->addOption( 'no-cache', 'Sets the `wgMainCacheType` to none while running the script', false ); + $this->addOption( 'debug', 'Sets global variables to support debug ouput while running the script', false ); + $this->addOption( 'quiet', 'Do not give any output', false ); + } + + /** + * @see Maintenance::execute + */ + public function execute() { + + if ( !Setup::isEnabled() ) { + $this->reportMessage( "\nYou need to have SMW enabled in order to run the maintenance script!\n" ); + exit; + } + + if ( !Setup::isValid( true ) ) { + $this->reportMessage( "\nYou need to run `update.php` or `setupStore.php` first before continuing\nwith any maintenance tasks!\n" ); + exit; + } + + $maintenanceFactory = ApplicationFactory::getInstance()->newMaintenanceFactory(); + + $maintenanceHelper = $maintenanceFactory->newMaintenanceHelper(); + $maintenanceHelper->initRuntimeValues(); + + if ( $this->hasOption( 'no-cache' ) ) { + $maintenanceHelper->setGlobalToValue( 'wgMainCacheType', CACHE_NONE ); + $maintenanceHelper->setGlobalToValue( 'smwgEntityLookupCacheType', CACHE_NONE ); + $maintenanceHelper->setGlobalToValue( 'smwgQueryResultCacheType', CACHE_NONE ); + } + + if ( $this->hasOption( 'debug' ) ) { + $maintenanceHelper->setGlobalToValue( 'wgShowExceptionDetails', true ); + $maintenanceHelper->setGlobalToValue( 'wgShowSQLErrors', true ); + $maintenanceHelper->setGlobalToValue( 'wgShowDBErrorBacktrace', true ); + } else { + $maintenanceHelper->setGlobalToValue( 'wgDebugLogFile', '' ); + $maintenanceHelper->setGlobalToValue( 'wgDebugLogGroups', [] ); + } + + $store = StoreFactory::getStore( $this->hasOption( 'b' ) ? $this->getOption( 'b' ) : null ); + $store->setOption( Store::OPT_CREATE_UPDATE_JOB, false ); + + $dataRebuilder = $maintenanceFactory->newDataRebuilder( + $store, + [ $this, 'reportMessage' ] + ); + + $dataRebuilder->setOptions( + new Options( $this->mOptions ) + ); + + $result = $this->checkForRebuildState( + $dataRebuilder->rebuild() + ); + + if ( $result && $this->hasOption( 'property-statistics' ) ) { + $rebuildPropertyStatistics = $maintenanceFactory->newRebuildPropertyStatistics(); + $rebuildPropertyStatistics->execute(); + } + + if ( $result && $this->hasOption( 'report-runtime' ) ) { + $this->reportMessage( "\n" . "Runtime report ..." . "\n" ); + $this->reportMessage( $maintenanceHelper->getFormattedRuntimeValues( ' ...' ) . "\n" ); + } + + if ( $this->hasOption( 'with-maintenance-log' ) ) { + $maintenanceLogger = $maintenanceFactory->newMaintenanceLogger( 'RebuildDataLogger' ); + $runtimeValues = $maintenanceHelper->getRuntimeValues(); + + $log = [ + 'Memory used: ' . $runtimeValues['memory-used'], + 'Time used: ' . $runtimeValues['humanreadable-time'], + 'Rebuild count: ' . $dataRebuilder->getRebuildCount(), + 'Exception count: ' . $dataRebuilder->getExceptionCount() + ]; + + $maintenanceLogger->log( implode( ', ', $log ) ); + } + + $maintenanceHelper->reset(); + + if ( $this->hasOption( 'report-poolcache' ) ) { + $this->reportMessage( "\n" . ApplicationFactory::getInstance()->getInMemoryPoolCache()->getStats( \SMW\Utils\StatsFormatter::FORMAT_JSON ) . "\n" ); + } + + return $result; + } + + /** + * @since 1.9.2 + * + * @param string $message + */ + public function reportMessage( $message ) { + $this->output( $message ); + } + + private function checkForRebuildState( $rebuildResult ) { + + if ( !$rebuildResult ) { + $this->reportMessage( $this->mDescription . "\n\n" . 'Use option --help for usage details.' . "\n" ); + return false; + } + + return true; + } + +} + +$maintClass = 'SMW\Maintenance\RebuildData'; +require_once ( RUN_MAINTENANCE_IF_MAIN ); diff --git a/www/wiki/extensions/SemanticMediaWiki/maintenance/rebuildElasticIndex.php b/www/wiki/extensions/SemanticMediaWiki/maintenance/rebuildElasticIndex.php new file mode 100644 index 00000000..fe48d734 --- /dev/null +++ b/www/wiki/extensions/SemanticMediaWiki/maintenance/rebuildElasticIndex.php @@ -0,0 +1,366 @@ +<?php + +namespace SMW\Maintenance; + +use SMW\ApplicationFactory; +use SMW\SQLStore\SQLStore; +use SMW\Elastic\ElasticFactory; +use SMW\Setup; + +$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv('MW_INSTALL_PATH' ) : __DIR__ . '/../../..'; + +require_once $basePath . '/maintenance/Maintenance.php'; + +/** + * @license GNU GPL v2+ + * @since 3.0 + * + * @author mwjames + */ +class RebuildElasticIndex extends \Maintenance { + + /** + * @var Store + */ + private $store; + + /** + * @var Rebuilder + */ + private $rebuilder; + + /** + * @var JobQueue + */ + private $jobQueue; + + /** + * @see Maintenance::__construct + * + * @since 3.0 + */ + public function __construct() { + $this->mDescription = 'Rebuild the Elasticsearch index from property tables (content is not explicitly parsed!)'; + $this->addOption( 's', 'Start with a selected document no.', false, true ); + $this->addOption( 'e', 'End with a selected document no. (requires a start ID)', false, true ); + $this->addOption( 'page', 'Set of pages (Foo|Bar|...)', false, true ); + $this->addOption( 'update-settings', 'Update settings and mappings for listed indices', false, false ); + $this->addOption( 'force-refresh', 'Forces a refresh of listed indices', false, false ); + $this->addOption( 'delete-all', 'Delete listed indices without rebuilding the data', false, false ); + $this->addOption( 'skip-fileindex', 'Skipping any file ingest actions', false, false ); + $this->addOption( 'run-fileindex', 'Only run file ingest actions', false, false ); + + $this->addOption( 'debug', 'Sets global variables to support debug ouput while running the script', false ); + $this->addOption( 'report-runtime', 'Report execution time and memory usage', false ); + + parent::__construct(); + } + + /** + * @since 3.0 + * + * @param string $message + */ + public function reportMessage( $message ) { + $this->output( $message ); + } + + /** + * @see Maintenance::execute + */ + public function execute() { + + if ( !Setup::isEnabled() ) { + $this->output( "You need to have SMW enabled in order to use this maintenance script!\n\n" ); + exit; + } + + if ( !Setup::isValid( true ) ) { + $this->reportMessage( "\nYou need to run `update.php` or `setupStore.php` first before continuing\nwith any maintenance tasks!\n" ); + exit; + } + + // If available, set a callback to listen to a possible user termination + // and try to recover the index settings. + if ( function_exists( 'pcntl_signal_dispatch' ) ) { + pcntl_signal( SIGTERM, [ $this, 'handleTermSignal' ], false ); + pcntl_signal_dispatch(); + } + + $applicationFactory = ApplicationFactory::getInstance(); + $maintenanceFactory = $applicationFactory->newMaintenanceFactory(); + + $maintenanceHelper = $maintenanceFactory->newMaintenanceHelper(); + $maintenanceHelper->initRuntimeValues(); + + if ( $this->hasOption( 'debug' ) ) { + $maintenanceHelper->setGlobalToValue( 'wgShowExceptionDetails', true ); + $maintenanceHelper->setGlobalToValue( 'wgShowSQLErrors', true ); + $maintenanceHelper->setGlobalToValue( 'wgShowDBErrorBacktrace', true ); + } else { + $maintenanceHelper->setGlobalToValue( 'wgDebugLogFile', '' ); + $maintenanceHelper->setGlobalToValue( 'wgDebugLogGroups', [] ); + } + + $this->jobQueue = $applicationFactory->getJobQueue(); + $this->store = $applicationFactory->getStore( 'SMW\SQLStore\SQLStore' ); + $elasticFactory = $applicationFactory->create( 'ElasticFactory' ); + + $this->rebuilder = $elasticFactory->newRebuilder( + $this->store + ); + + $this->rebuilder->setMessageReporter( + $maintenanceFactory->newMessageReporter( [ $this, 'reportMessage' ] ) + ); + + if ( !$this->rebuilder->ping() ) { + return $this->reportMessage( + "\n" . 'Elasticsearch endpoint(s) are not available!' . "\n" + ); + } + + $this->reportMessage( + "\nThe script rebuilds the index from available property tables. Any\n" . + "change of the index rules (e.g. altered stopwords, new stemmer etc.)\n" . + "or a newly added (or altered) table requires to run this script again\n" . + "to ensure that the index complies with the rules set forth by the SQL\n" . + "back-end or the Elasticsearch field mapping.\n" + ); + + if ( $this->otherActivities() ) { + return true; + } + + $this->showAbort(); + + $this->reportMessage( + "\nIf for some reason the rebuild process is aborted, please make sure\n" . + "to run `--update-settings` so that default settings can be recovered\n". + "and set to a normal working mode.\n" + ); + + $this->rebuild(); + + if ( $this->hasOption( 'report-runtime' ) ) { + $this->reportMessage( "\n" . $maintenanceHelper->getFormattedRuntimeValues() . "\n" ); + } + + $maintenanceHelper->reset(); + + return true; + } + + /** + * @see Maintenance::addDefaultParams + * + * @since 3.0 + */ + protected function addDefaultParams() { + parent::addDefaultParams(); + } + + protected function handleTermSignal( $signal ) { + + $this->reportMessage( "\n" . ' ... rebuild was terminated, start recovery process ...' ); + $this->rebuilder->setDefaults(); + $this->rebuilder->refresh(); + $this->reportMessage( "\n" . ' ... done.' . "\n" ); + + pcntl_signal( SIGTERM, SIG_DFL ); + exit( 1 ); + } + + private function otherActivities() { + + if ( $this->hasOption( 'update-settings' ) ) { + $this->reportMessage( + "\n" . 'Settings and mappings ...' + ); + + $message = $this->rebuilder->setDefaults() ? ' ... done.' : ' ... failed (due to missing index).'; + $this->reportMessage( "\n$message\n" ); + + return true; + } + + if ( $this->hasOption( 'force-refresh' ) ) { + $this->reportMessage( + "\n" . 'Forcing refresh of known indices ...' + ); + + $message = $this->rebuilder->refresh() ? ' ... done.' : ' ... failed (due to missing index).'; + $this->reportMessage( "\n$message\n" ); + + return true; + } + + if ( $this->hasOption( 'delete-all' ) ) { + $this->reportMessage( + "\n" . 'Deleting all indices ...' + ); + + $this->rebuilder->deleteAndSetupIndices(); + $this->reportMessage( "\n ... done.\n" ); + + return true; + } + + return false; + } + + private function showAbort() { + + $showAbort = !$this->hasOption( 'quick' ) && !$this->hasOption( 's' ) && !$this->hasOption( 'page' ) && !$this->hasOption( 'run-fileindex' ); + + if ( !$showAbort ) { + return; + } + + $this->reportMessage( + "\nThe rebuild will use a rollover approach which means that while the\n" . + "new index is created, the old index is still available and allows\n" . + "queries to work even though the rebuild is ongoing. Once completed,\n" . + "a \"rollover\" will switch the indices at which point the old indices\n" . + "are being removed.\n" + ); + + $this->reportMessage( + "\nIt should be noted that the replication is paused for the duration\n" . + "of the rebuild to allow changes to pages and annotations to be\n" . + "processed after the re-index has been completed therefore running\n". + "the job scheduler is obligatory.\n" + ); + + $this->reportMessage( "\n" . 'Abort the rebuild with control-c in the next five seconds ... ' ); + swfCountDown( 5 ); + } + + private function rebuild() { + + $this->reportMessage( "\nRebuilding indices ..." ); + $isSelective = $this->hasOption( 's' ) || $this->hasOption( 'page' ); + + if ( !$this->hasOption( 's' ) && !$this->hasOption( 'page' ) && !$this->hasOption( 'run-fileindex' ) ) { + $this->reportMessage( "\n" . ' ... creating required indices and aliases ...' ); + $this->rebuilder->createIndices(); + } + + $this->rebuilder->prepare(); + + list( $res, $last ) = $this->rebuilder->select( + $this->store, + $this->select_conditions() + ); + + if ( $isSelective ) { + $last = $res->numRows(); + } + + if ( $res->numRows() > 0 ) { + $this->reportMessage( "\n" ); + } else { + $this->reportMessage( "\n" . ' ... no documents to process ...' ); + } + + $this->rebuilder->set( 'skip-fileindex', $this->getOption( 'skip-fileindex' ) ); + $i = 0; + + foreach ( $res as $row ) { + $i++; + $this->rebuild_row( $i, $row, $last, $isSelective ); + } + + $this->rebuilder->setDefaults(); + $this->rebuilder->refresh(); + + $this->reportMessage( "\n" . ' ... done.' . "\n" ); + + if ( ( $count = $this->jobQueue->getQueueSize( 'smw.elasticIndexerRecovery' ) ) > 0 ) { + $this->reportMessage( "\n" . "Job queue ..." ); + $this->reportMessage( "\n" . " ... smw.elasticIndexerRecovery has $count unprocessed jobs ..." ); + $this->reportMessage( "\n" . ' ... done.' . "\n" ); + } + } + + private function rebuild_row( $i, $row, $last, $isSelective ) { + + $i = $isSelective ? $i : $row->smw_id; + $key = $isSelective ? '(count)' : 'no.'; + + $this->reportMessage( + "\r". sprintf( "%-50s%s", " ... updating document $key", sprintf( "%4.0f%% (%s/%s)", ( $i / $last ) * 100, $i, $last ) ) + ); + + if ( $row->smw_iw === SMW_SQL3_SMWDELETEIW || $row->smw_iw === SMW_SQL3_SMWREDIIW ) { + return $this->rebuilder->delete( $row->smw_id ); + } + + $dataItem = $this->store->getObjectIds()->getDataItemById( + $row->smw_id + ); + + if ( $dataItem === null ) { + return; + } + + $this->rebuilder->rebuild( + $row->smw_id, + $this->store->getSemanticData( $dataItem ) + ); + } + + private function select_conditions() { + + $connection = $this->store->getConnection( 'mw.db' ); + + $conditions = []; + $conditions[] = "smw_iw!=" . $connection->addQuotes( SMW_SQL3_SMWIW_OUTDATED ); + + if ( $this->hasOption( 's' ) ) { + $conditions[] = 'smw_id >= ' . $connection->addQuotes( $this->getOption( 's' ) ); + + if ( $this->hasOption( 'e' ) ) { + $conditions[] = 'smw_id <= ' . $connection->addQuotes( $this->getOption( 'e' ) ); + } + } + + if ( $this->hasOption( 'run-fileindex' ) ) { + $conditions[] = 'smw_namespace=' . $connection->addQuotes( NS_FILE ); + } + + if ( $this->hasOption( 'page' ) ) { + $pages = explode( '|', $this->getOption( 'page' ) ); + + foreach ( $pages as $page ) { + $title = \Title::newFromText( $page ); + + if ( $title === null ) { + continue; + } + + $op = '='; + $text = $title->getDBKey(); + + // Match something like --page="Lorem*" + if ( strpos( $title->getDBKey(), '*' ) !== false ) { + $op = ' LIKE '; + $text = str_replace( '*', '%', $text ); + } + + $cond = [ + "smw_title$op" . $connection->addQuotes( $text ), + 'smw_namespace=' . $connection->addQuotes( $title->getNamespace() ) + ]; + + $conditions[] = implode( ' AND ', $cond ); + } + } + + return $conditions; + } + +} + +$maintClass = 'SMW\Maintenance\RebuildElasticIndex'; +require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/www/wiki/extensions/SemanticMediaWiki/maintenance/rebuildFulltextSearchTable.php b/www/wiki/extensions/SemanticMediaWiki/maintenance/rebuildFulltextSearchTable.php new file mode 100644 index 00000000..c600d257 --- /dev/null +++ b/www/wiki/extensions/SemanticMediaWiki/maintenance/rebuildFulltextSearchTable.php @@ -0,0 +1,177 @@ +<?php + +namespace SMW\Maintenance; + +use Onoi\MessageReporter\MessageReporterFactory; +use SMW\SQLStore\QueryEngine\FulltextSearchTableFactory; +use SMW\ApplicationFactory; +use SMWDataItem as DataItem; +use SMW\Setup; + +$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../..'; + +require_once $basePath . '/maintenance/Maintenance.php'; + +/** + * @license GNU GPL v2+ + * @since 2.5 + * + * @author mwjames + */ +class RebuildFulltextSearchTable extends \Maintenance { + + public function __construct() { + $this->mDescription = 'Rebuild the fulltext search index (only works with SQLStore)'; + $this->addOption( 'report-runtime', 'Report execution time and memory usage', false ); + $this->addOption( 'with-maintenance-log', 'Add log entry to `Special:Log` about the maintenance run.', false ); + $this->addOption( 'optimize', 'Run possible table optimization (support depends on the SQL back-end) ', false ); + $this->addOption( 'v', 'Show additional (verbose) information about the progress', false ); + $this->addOption( 'quick', 'Suppress abort operation', false ); + + parent::__construct(); + } + + + /** + * @see Maintenance::addDefaultParams + * + * @since 2.5 + */ + protected function addDefaultParams() { + + parent::addDefaultParams(); + } + + /** + * @see Maintenance::execute + */ + public function execute() { + + if ( !Setup::isEnabled() ) { + $this->reportMessage( "\nYou need to have SMW enabled in order to run the maintenance script!\n" ); + exit; + } + + if ( !Setup::isValid( true ) ) { + $this->reportMessage( "\nYou need to run `update.php` or `setupStore.php` first before continuing\nwith any maintenance tasks!\n" ); + exit; + } + + $applicationFactory = ApplicationFactory::getInstance(); + $maintenanceFactory = $applicationFactory->newMaintenanceFactory(); + + $fulltextSearchTableFactory = new FulltextSearchTableFactory(); + + // Only the SQLStore is supported + $searchTableRebuilder = $fulltextSearchTableFactory->newSearchTableRebuilder( + $applicationFactory->getStore( '\SMW\SQLStore\SQLStore' ) + ); + + $textSanitizer = $fulltextSearchTableFactory->newTextSanitizer(); + + $searchTableRebuilder->reportVerbose( + $this->hasOption( 'v' ) + ); + + $searchTableRebuilder->requestOptimization( + $this->hasOption( 'optimize' ) + ); + + $this->reportMessage( + "\nThe script rebuilds the search index from property tables that\n" . + "support a fulltext search. Any change of the index rules (altered\n". + "stopwords, new stemmer etc.) and/or a newly added or altered table\n". + "requires to run this script again to ensure that the index complies\n". + "with the rules set forth by the SQL back-end or Sanitizer.\n" + ); + + $this->reportConfiguration( + $searchTableRebuilder, + $textSanitizer + ); + + if ( !$this->hasOption( 'quick' ) ) { + $this->reportMessage( "\n" . 'Abort the rebuild with control-c in the next five seconds ... ' ); + swfCountDown( 5 ); + } + + $maintenanceHelper = $maintenanceFactory->newMaintenanceHelper(); + $maintenanceHelper->initRuntimeValues(); + + // Need to instantiate an extra object here since we cannot make this class itself + // into a MessageReporter since the maintenance script does not load the interface in time. + $reporter = MessageReporterFactory::getInstance()->newObservableMessageReporter(); + $reporter->registerReporterCallback( [ $this, 'reportMessage' ] ); + + $searchTableRebuilder->setMessageReporter( $reporter ); + $result = $searchTableRebuilder->rebuild(); + + if ( $result && $this->hasOption( 'report-runtime' ) ) { + $this->reportMessage( "\n" . "Runtime report ..." . "\n" ); + $this->reportMessage( $maintenanceHelper->getFormattedRuntimeValues( ' ...' ) . "\n" ); + } + + if ( $this->hasOption( 'with-maintenance-log' ) ) { + $maintenanceLogger = $maintenanceFactory->newMaintenanceLogger( 'RebuildFulltextSearchTableLogger' ); + $maintenanceLogger->log( $maintenanceHelper->getFormattedRuntimeValues() ); + } + + $maintenanceHelper->reset(); + return $result; + } + + private function reportConfiguration( $searchTableRebuilder, $textSanitizer ) { + + $this->reportMessage( "\nConfiguration ..." ); + + foreach ( $textSanitizer->getVersions() as $key => $value ) { + $this->reportMessage( "\n" . sprintf( "%-36s%s", " ... {$key}", $value ) ); + } + + $searchTable = $searchTableRebuilder->getSearchTable(); + $indexableDataTypes = []; + + $dataTypes = [ + DataItem::TYPE_BLOB => 'BLOB', + DataItem::TYPE_URI => 'URI', + DataItem::TYPE_WIKIPAGE => 'WIKIPAGE' + ]; + + foreach ( $dataTypes as $key => $value ) { + if ( $searchTable->isValidByType( $key ) ) { + $indexableDataTypes[] = $value; + } + } + + $this->reportMessage( "\n" . sprintf( "%-36s%s", " ... DataTypes (indexable)", implode( ', ', $indexableDataTypes ) ) ); + $this->reportMessage( "\n\nExempted properties (not indexable) ..." ); + + $exemptionList = ''; + + foreach ( $searchTable->getPropertyExemptionList() as $prop ) { + $exemptionList .= ( $exemptionList === '' ? '' : ', ' ) . $prop; + + if ( strlen( $exemptionList ) > 50 ) { + $this->reportMessage( "\n ... " . $exemptionList ); + $exemptionList = ''; + } + } + + $this->reportMessage( "\n ... " . $exemptionList . "\n" ); + } + + /** + * @see Maintenance::reportMessage + * + * @since 2.5 + * + * @param string $message + */ + public function reportMessage( $message ) { + $this->output( $message ); + } + +} + +$maintClass = 'SMW\Maintenance\RebuildFulltextSearchTable'; +require_once ( RUN_MAINTENANCE_IF_MAIN ); diff --git a/www/wiki/extensions/SemanticMediaWiki/maintenance/rebuildPropertyStatistics.php b/www/wiki/extensions/SemanticMediaWiki/maintenance/rebuildPropertyStatistics.php new file mode 100644 index 00000000..ad64f6cd --- /dev/null +++ b/www/wiki/extensions/SemanticMediaWiki/maintenance/rebuildPropertyStatistics.php @@ -0,0 +1,86 @@ +<?php + +namespace SMW\Maintenance; + +use SMW\ApplicationFactory; +use SMW\Setup; + +$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../..'; + +require_once $basePath . '/maintenance/Maintenance.php'; + +/** + * Maintenance script for rebuilding the property usage statistics. + * + * @license GNU GPL v2+ + * @since 1.9 + * + * @author Jeroen De Dauw < jeroendedauw@gmail.com > + */ +class RebuildPropertyStatistics extends \Maintenance { + + public function __construct() { + $this->mDescription = 'Rebuild the property usage statistics (only works with SQLStore3 for now)'; + $this->addOption( 'with-maintenance-log', 'Add log entry to `Special:Log` about the maintenance run.', false ); + + parent::__construct(); + } + + /** + * @see Maintenance::addDefaultParams + * + * @since 1.9 + */ + protected function addDefaultParams() { + parent::addDefaultParams(); + } + + /** + * @see Maintenance::execute + */ + public function execute() { + + if ( !Setup::isEnabled() ) { + $this->output( "You need to have SMW enabled in order to use this maintenance script!\n\n" ); + exit; + } + + if ( !Setup::isValid( true ) ) { + $this->reportMessage( "\nYou need to run `update.php` or `setupStore.php` first before continuing\nwith any maintenance tasks!\n" ); + exit; + } + + $applicationFactory = ApplicationFactory::getInstance(); + $maintenanceFactory = $applicationFactory->newMaintenanceFactory(); + + $maintenanceHelper = $maintenanceFactory->newMaintenanceHelper(); + $maintenanceHelper->initRuntimeValues(); + + $statisticsRebuilder = $maintenanceFactory->newPropertyStatisticsRebuilder( + $applicationFactory->getStore( 'SMW\SQLStore\SQLStore' ), + [ $this, 'reportMessage' ] + ); + + $statisticsRebuilder->rebuild(); + + if ( $this->hasOption( 'with-maintenance-log' ) ) { + $maintenanceLogger = $maintenanceFactory->newMaintenanceLogger( 'RebuildPropertyStatisticsLogger' ); + $maintenanceLogger->log( $maintenanceHelper->getFormattedRuntimeValues() ); + } + } + + /** + * @see Maintenance::reportMessage + * + * @since 1.9 + * + * @param string $message + */ + public function reportMessage( $message ) { + $this->output( $message ); + } + +} + +$maintClass = 'SMW\Maintenance\RebuildPropertyStatistics'; +require_once ( RUN_MAINTENANCE_IF_MAIN ); diff --git a/www/wiki/extensions/SemanticMediaWiki/maintenance/removeDuplicateEntities.php b/www/wiki/extensions/SemanticMediaWiki/maintenance/removeDuplicateEntities.php new file mode 100644 index 00000000..a058f167 --- /dev/null +++ b/www/wiki/extensions/SemanticMediaWiki/maintenance/removeDuplicateEntities.php @@ -0,0 +1,90 @@ +<?php + +namespace SMW\Maintenance; + +use SMW\ApplicationFactory; +use SMW\Setup; + +$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( +'MW_INSTALL_PATH' ) : __DIR__ . '/../../..'; + +require_once $basePath . '/maintenance/Maintenance.php'; + +/** + * @license GNU GPL v2+ + * @since 3.0 + * + * @author mwjames + */ +class RemoveDuplicateEntities extends \Maintenance { + + /** + * @since 3.0 + */ + public function __construct() { + $this->mDescription = 'Remove duplicate entities without active references.'; + $this->addOption( 's', 'ID starting point', false, true ); + + parent::__construct(); + } + + /** + * @see Maintenance::addDefaultParams + * + * @since 3.0 + */ + protected function addDefaultParams() { + parent::addDefaultParams(); + } + + /** + * @see Maintenance::execute + */ + public function execute() { + + if ( !Setup::isEnabled() ) { + $this->reportMessage( "\nYou need to have SMW enabled in order to run the maintenance script!\n" ); + exit; + } + + if ( !Setup::isValid() ) { + $this->reportMessage( "\nYou need to run `update.php` or `setupStore.php` first before continuing\nwith any maintenance tasks!\n" ); + exit; + } + + $this->reportMessage( + "\nThe script will only dispose of those duplicate entities that have no active\n" . + "references. The log section 'untouched' contains IDs that have not been\n" . + "removed and the user is asked to verify the content and manually remove\n". + "those listed entities.\n\n" + ); + + $applicationFactory = ApplicationFactory::getInstance(); + $maintenanceFactory = $applicationFactory->newMaintenanceFactory(); + + $duplicateEntitiesDisposer = $maintenanceFactory->newDuplicateEntitiesDisposer( + $applicationFactory->getStore( 'SMW\SQLStore\SQLStore' ), + [ $this, 'reportMessage' ] + ); + + $duplicateEntityRecords = $duplicateEntitiesDisposer->findDuplicates(); + $duplicateEntitiesDisposer->verifyAndDispose( $duplicateEntityRecords ); + + return true; + } + + /** + * @see Maintenance::reportMessage + * + * @since 1.9 + * + * @param string $message + */ + public function reportMessage( $message ) { + $this->output( $message ); + } + +} + +$maintClass = 'SMW\Maintenance\RemoveDuplicateEntities'; +require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/www/wiki/extensions/SemanticMediaWiki/maintenance/setupStore.php b/www/wiki/extensions/SemanticMediaWiki/maintenance/setupStore.php new file mode 100644 index 00000000..59d6fe98 --- /dev/null +++ b/www/wiki/extensions/SemanticMediaWiki/maintenance/setupStore.php @@ -0,0 +1,265 @@ +<?php + +namespace SMW\Maintenance; + +use SMW\Store; +use SMW\StoreFactory; +use Onoi\MessageReporter\MessageReporterFactory; +use Onoi\MessageReporter\MessageReporter; +use SMW\ApplicationFactory; +use SMW\SQLStore\Installer; +use SMW\Setup; + +$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../..'; + +require_once $basePath . '/maintenance/Maintenance.php'; + +/** + * Sets up the storage backend currently selected in LocalSettings.php + * (or the default MySQL store if no other store was selected). This + * is equivalent to clicking the respective button on the special page + * Special:SMWAdmin. However, the latter may timeout if the setup involves + * migrating a lot of existing data. + * + * Note: If SMW is not installed in its standard path under ./extensions + * then the MW_INSTALL_PATH environment variable must be set. + * See README in the maintenance directory. + * + * Usage: + * php setupStore.php [options...] + * + * -password Password for user account + * NOTE: specifying user credentials in a command line call will usually store them + * within the shell history file. For security, provide credentials in Adminssetings.php + * instead and ensure that your text editor does not create world-readable backup copies + * when modifying this file. + * + * --delete Delete all SMW data, uninstall the selected storage backend. This is useful + * when moving to a new storage engine, and in the rare case of unsinstalling + * SMW. Deleted data can be recreated using this script (setup) followed by the + * use of the rebuildhData.php script which may take some time. + * + * --backend The backend to use, e.g. SMWSQLStore3. + * + * --skip-optimize Skips the table optimization process. + * + * --skip-import Skips the import process. + * + * --nochecks When specified, no prompts are provided. Deletion will thus happen + * without the need to provide any confirmation. + * + * @author Markus Krötzsch + * @author Jeroen De Dauw < jeroendedauw@gmail.com > + */ +class SetupStore extends \Maintenance { + + /** + * Name of the store class configured in LocalSettings.php. Stored to + * be able to tell if the selected store is the currecnt default or not. + * + * @var string + */ + protected $originalStore; + + /** + * @var MessageReporter + */ + protected $messageReporter; + + /** + * @since 2.0 + */ + public function __construct() { + parent::__construct(); + } + + /** + * @see Maintenance::addDefaultParams + * + * @since 2.0 + */ + protected function addDefaultParams() { + parent::addDefaultParams(); + + $this->mDescription = 'Sets up the SMW storage backend currently selected in LocalSettings.php.'; + + $this->addOption( 'backend', 'Execute the operation for the storage backend of the given name.', false, true, 'b' ); + + $this->addOption( 'delete', 'Delete all SMW data, uninstall the selected storage backend.' ); + $this->addOption( 'skip-optimize', 'Skipping the table optimization process (not recommended).', false ); + $this->addOption( 'skip-import', 'Skipping the import process.', false ); + + $this->addOption( + 'nochecks', + 'Run the script without providing prompts. Deletion will thus happen without the need to provide any confirmation.' + ); + } + + /** + * @since 3.0 + * + * @param MessageReporter $messageReporter + */ + public function setMessageReporter( MessageReporter $messageReporter ) { + $this->messageReporter = $messageReporter; + } + + /** + * @see Maintenance::getDbType + * + * @since 3.0 + */ + public function getDbType() { + return \Maintenance::DB_ADMIN; + } + + /** + * @since 3.0 + */ + public function getConnection() { + return $this->getDB( DB_MASTER ); + } + + /** + * @see Maintenance::execute + * + * @since 2.0 + */ + public function execute() { + + if ( !Setup::isEnabled() ) { + $this->reportMessage( "\nYou need to have SMW enabled in order to run the maintenance script!\n" ); + exit; + } + + StoreFactory::clear(); + + $this->loadGlobalFunctions(); + $store = $this->getStore(); + + $connectionManager = ApplicationFactory::getInstance()->getConnectionManager(); + + // #2963 Use the Maintenance DB connection instead and the DB_ADMIN request + // to allow to use the admin user/pass, if set + $connectionManager->registerCallbackConnection( DB_MASTER, [ $this, 'getConnection' ] ); + + $store->setConnectionManager( + $connectionManager + ); + + $store->setMessageReporter( + $this->getMessageReporter() + ); + + $store->setOption( Installer::OPT_TABLE_OPTIMIZE, !$this->hasOption( 'skip-optimize' ) ); + $store->setOption( Installer::OPT_IMPORT, !$this->hasOption( 'skip-import' ) ); + $store->setOption( Installer::OPT_SUPPLEMENT_JOBS, true ); + + if ( $this->hasOption( 'delete' ) ) { + $this->dropStore( $store ); + } else { + $store->setup(); + } + + // Avoid holding a reference + StoreFactory::clear(); + } + + protected function getMessageReporter() { + + $messageReporterFactory = MessageReporterFactory::getInstance(); + + if ( $this->messageReporter === null && $this->getOption( 'quiet' ) ) { + $this->messageReporter = $messageReporterFactory->newNullMessageReporter(); + } elseif( $this->messageReporter === null ) { + $this->messageReporter = $messageReporterFactory->newObservableMessageReporter(); + $this->messageReporter->registerReporterCallback( [ $this, 'reportMessage' ] ); + } + + return $this->messageReporter; + } + + protected function loadGlobalFunctions() { + global $smwgIP; + + if ( !isset( $smwgIP ) ) { + $smwgIP = dirname( __FILE__ ) . '/../'; + } + + require_once ( $smwgIP . 'src/GlobalFunctions.php' ); + } + + protected function getStore() { + global $smwgDefaultStore; + + $storeClass = $this->getOption( 'backend', $smwgDefaultStore ); + $this->originalStore = $smwgDefaultStore; + + if ( class_exists( $storeClass ) ) { + $smwgDefaultStore = $storeClass; + } else { + $this->error( "\nError: There is no backend class \"$storeClass\". Aborting.", 1 ); + } + + return StoreFactory::getStore( $storeClass ); + } + + protected function dropStore( Store $store ) { + $storeName = get_class( $store ); + + $verification = $this->promptDeletionVerification( $storeName ); + + if ( !$verification ) { + return; + } + + $store->drop( !$this->isQuiet() ); + + // be sure to have some buffer, otherwise some PHPs complain + while ( ob_get_level() > 0 ) { + ob_end_flush(); + } + + $this->output( "\nYou can recreate them with this script followed by the use\n"); + $this->output( "of the rebuildData.php script to rebuild their contents.\n"); + } + + /** + * @param string $storeName + * + * @return boolean + */ + protected function promptDeletionVerification( $storeName ) { + $this->output( "You are about to delete all data stored in the SMW backend $storeName.\n" ); + + if ( $storeName === $this->originalStore ) { + $this->output( "This backend is CURRENTLY IN USE. Deleting it is likely to BREAK YOUR WIKI.\n" ); + } else { + $this->output( "This backend is not currently in use. Deleting it should not cause any problems.\n" ); + } + $this->output( "To undo this operation later on, a complete refresh of the data will be needed.\n" ); + + if ( !$this->hasOption( 'nochecks' ) ) { + print ( "If you are sure you want to proceed, type DELETE.\n" ); + + if ( $this->readconsole() !== 'DELETE' ) { + print ( "Aborting.\n\n" ); + return false; + } + } + return true; + } + + /** + * @since 3.0 + * + * @param string $message + */ + public function reportMessage( $message ) { + $this->output( $message ); + } + +} + +$maintClass = 'SMW\Maintenance\SetupStore'; +require_once ( RUN_MAINTENANCE_IF_MAIN ); diff --git a/www/wiki/extensions/SemanticMediaWiki/maintenance/updateEntityCollation.php b/www/wiki/extensions/SemanticMediaWiki/maintenance/updateEntityCollation.php new file mode 100644 index 00000000..2328cf91 --- /dev/null +++ b/www/wiki/extensions/SemanticMediaWiki/maintenance/updateEntityCollation.php @@ -0,0 +1,167 @@ +<?php + +namespace SMW\Maintenance; + +use SMW\ApplicationFactory; +use SMW\SQLStore\SQLStore; +use SMW\SQLStore\TableFieldUpdater; +use SMW\DIWikiPage; +use SMW\DIProperty; +use SMWDataItem as DataItem; +use SMW\Exception\PredefinedPropertyLabelMismatchException; +use SMW\Setup; + +$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv('MW_INSTALL_PATH' ) : __DIR__ . '/../../..'; + +require_once $basePath . '/maintenance/Maintenance.php'; + +/** + * @license GNU GPL v2+ + * @since 3.0 + * + * @author mwjames + */ +class UpdateEntityCollation extends \Maintenance { + + public function __construct() { + $this->mDescription = 'Update the smw_sort field (relying on the $smwgEntityCollation setting)'; + $this->addOption( 's', 'ID starting point', false, true ); + + parent::__construct(); + } + + /** + * @see Maintenance::addDefaultParams + * + * @since 3.0 + */ + protected function addDefaultParams() { + + parent::addDefaultParams(); + } + + /** + * @see Maintenance::execute + */ + public function execute() { + + if ( !Setup::isEnabled() ) { + $this->reportMessage( "\nYou need to have SMW enabled in order to run the maintenance script!\n" ); + exit; + } + + if ( !Setup::isValid( true ) ) { + $this->reportMessage( "\nYou need to run `update.php` or `setupStore.php` first before continuing\nwith any maintenance tasks!\n" ); + exit; + } + + $applicationFactory = ApplicationFactory::getInstance(); + $store = $applicationFactory->getStore( 'SMW\SQLStore\SQLStore' ); + + $connection = $store->getConnection( 'mw.db' ); + $tableFieldUpdater = new TableFieldUpdater( $store ); + + $condition = " smw_iw!=" . $connection->addQuotes( SMW_SQL3_SMWIW_OUTDATED ) . " AND smw_iw!=" . $connection->addQuotes( SMW_SQL3_SMWDELETEIW ); + $i = 1; + + if ( $this->hasOption( 's' ) ) { + $i = $this->getOption( 's' ); + $condition .= ' AND smw_id > ' . $connection->addQuotes( $this->getOption( 's' ) ); + } + + $res = $connection->select( + SQLStore::ID_TABLE, + [ + 'smw_id', + 'smw_title', + 'smw_sortkey' + ], + $condition, + __METHOD__ + ); + + $expected = $res->numRows() + $i; + + if ( $applicationFactory->getSettings()->get( 'smwgEntityCollation' ) !== $GLOBALS['wgCategoryCollation'] ) { + $this->reportMessage( + "\n" . 'The setting of $smwgEntityCollation and $wgCategoryCollation are different' . "\n" . + 'and may result in an inconsitent sorting display for entities.' . "\n" + ); + + $this->reportMessage( "\n" . '$smwgEntityCollation: ' . $applicationFactory->getSettings()->get( 'smwgEntityCollation' ) ); + $this->reportMessage( "\n" . '$wgCategoryCollation: ' . $GLOBALS['wgCategoryCollation'] . "\n" ); + } + + $this->reportMessage( + "\nPerforming the update ..." + ); + + $this->reportMessage( "\n ... selecting $expected rows ..." ); + $this->reportMessage( "\n" ); + + $this->doUpdate( $store, $tableFieldUpdater, $res, $i, $expected ); + $this->reportMessage( "\n" ); + } + + private function doUpdate( $store, $tableFieldUpdater, $res, $i, $expected ) { + $property = new DIProperty( '_SKEY' ); + + foreach ( $res as $row ) { + + if ( $row->smw_title === '' ) { + continue; + } + + $i++; + + $dataItem = $store->getObjectIds()->getDataItemById( $row->smw_id ); + $pv = $store->getPropertyValues( $dataItem, $property ); + + $search = $this->getSortKey( $row, $pv ); + + if ( $search === '' && $row->smw_title !== '' ) { + $search = str_replace( '_', ' ', $row->smw_title ); + } + + $this->reportMessage( + "\r". sprintf( "%-35s%s", " ... updating document no.", sprintf( "%4.0f%% (%s/%s)", ( $i / $expected ) * 100, $i, $expected ) ) + ); + + $tableFieldUpdater->updateSortField( $row->smw_id, $search ); + } + } + + private function getSortKey( $row, $pv ) { + + if ( $pv !== [] ) { + return end( $pv )->getString(); + } + + if ( $row->smw_title{0} !== '_' ) { + return $row->smw_sortkey; + } + + try { + $property = new DIProperty( $row->smw_title ); + } catch ( PredefinedPropertyLabelMismatchException $e ) { + return $row->smw_sortkey; + } + + return $property->getCanonicalLabel(); + } + + /** + * @see Maintenance::reportMessage + * + * @since 1.9 + * + * @param string $message + */ + public function reportMessage( $message ) { + $this->output( $message ); + } + +} + +$maintClass = 'SMW\Maintenance\UpdateEntityCollation'; +require_once( RUN_MAINTENANCE_IF_MAIN ); |