diff options
author | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
---|---|---|
committer | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
commit | fc7369835258467bf97eb64f184b93691f9a9fd5 (patch) | |
tree | daabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/includes/externalstore |
first commit
Diffstat (limited to 'www/wiki/includes/externalstore')
-rw-r--r-- | www/wiki/includes/externalstore/ExternalStore.php | 254 | ||||
-rw-r--r-- | www/wiki/includes/externalstore/ExternalStoreDB.php | 335 | ||||
-rw-r--r-- | www/wiki/includes/externalstore/ExternalStoreFactory.php | 42 | ||||
-rw-r--r-- | www/wiki/includes/externalstore/ExternalStoreHttp.php | 41 | ||||
-rw-r--r-- | www/wiki/includes/externalstore/ExternalStoreMedium.php | 91 | ||||
-rw-r--r-- | www/wiki/includes/externalstore/ExternalStoreMwstore.php | 105 |
6 files changed, 868 insertions, 0 deletions
diff --git a/www/wiki/includes/externalstore/ExternalStore.php b/www/wiki/includes/externalstore/ExternalStore.php new file mode 100644 index 00000000..de7d1a4c --- /dev/null +++ b/www/wiki/includes/externalstore/ExternalStore.php @@ -0,0 +1,254 @@ +<?php +/** + * @defgroup ExternalStorage ExternalStorage + */ + +use MediaWiki\MediaWikiServices; + +/** + * Interface for data storage in external repositories. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * Constructor class for key/value blob data kept in external repositories. + * + * Objects in external stores are defined by a special URL. The URL is of + * the form "<store protocol>://<location>/<object name>". The protocol is used + * to determine what ExternalStoreMedium class is used. The location identifies + * particular storage instances or database clusters for store class to use. + * + * When an object is inserted into a store, the calling code uses a partial URL of + * the form "<store protocol>://<location>" and receives the full object URL on success. + * This is useful since object names can be sequential IDs, UUIDs, or hashes. + * Callers are not responsible for unique name generation. + * + * External repositories might be populated by maintenance/async + * scripts, thus partial moving of data may be possible, as well + * as the possibility to have any storage format (i.e. for archives). + * + * @ingroup ExternalStorage + */ +class ExternalStore { + /** + * Get an external store object of the given type, with the given parameters + * + * @param string $proto Type of external storage, should be a value in $wgExternalStores + * @param array $params Associative array of ExternalStoreMedium parameters + * @return ExternalStoreMedium|bool The store class or false on error + */ + public static function getStoreObject( $proto, array $params = [] ) { + return MediaWikiServices::getInstance() + ->getExternalStoreFactory() + ->getStoreObject( $proto, $params ); + } + + /** + * Fetch data from given URL + * + * @param string $url The URL of the text to get + * @param array $params Associative array of ExternalStoreMedium parameters + * @return string|bool The text stored or false on error + * @throws MWException + */ + public static function fetchFromURL( $url, array $params = [] ) { + $parts = explode( '://', $url, 2 ); + if ( count( $parts ) != 2 ) { + return false; // invalid URL + } + + list( $proto, $path ) = $parts; + if ( $path == '' ) { // bad URL + return false; + } + + $store = self::getStoreObject( $proto, $params ); + if ( $store === false ) { + return false; + } + + return $store->fetchFromURL( $url ); + } + + /** + * Fetch data from multiple URLs with a minimum of round trips + * + * @param array $urls The URLs of the text to get + * @return array Map from url to its data. Data is either string when found + * or false on failure. + * @throws MWException + */ + public static function batchFetchFromURLs( array $urls ) { + $batches = []; + foreach ( $urls as $url ) { + $scheme = parse_url( $url, PHP_URL_SCHEME ); + if ( $scheme ) { + $batches[$scheme][] = $url; + } + } + $retval = []; + foreach ( $batches as $proto => $batchedUrls ) { + $store = self::getStoreObject( $proto ); + if ( $store === false ) { + continue; + } + $retval += $store->batchFetchFromURLs( $batchedUrls ); + } + // invalid, not found, db dead, etc. + $missing = array_diff( $urls, array_keys( $retval ) ); + if ( $missing ) { + foreach ( $missing as $url ) { + $retval[$url] = false; + } + } + + return $retval; + } + + /** + * Store a data item to an external store, identified by a partial URL + * The protocol part is used to identify the class, the rest is passed to the + * class itself as a parameter. + * + * @param string $url A partial external store URL ("<store type>://<location>") + * @param string $data + * @param array $params Associative array of ExternalStoreMedium parameters + * @return string|bool The URL of the stored data item, or false on error + * @throws MWException + */ + public static function insert( $url, $data, array $params = [] ) { + $parts = explode( '://', $url, 2 ); + if ( count( $parts ) != 2 ) { + return false; // invalid URL + } + + list( $proto, $path ) = $parts; + if ( $path == '' ) { // bad URL + return false; + } + + $store = self::getStoreObject( $proto, $params ); + if ( $store === false ) { + return false; + } else { + return $store->store( $path, $data ); + } + } + + /** + * Like insert() above, but does more of the work for us. + * This function does not need a url param, it builds it by + * itself. It also fails-over to the next possible clusters + * provided by $wgDefaultExternalStore. + * + * @param string $data + * @param array $params Map of ExternalStoreMedium::__construct context parameters + * @return string|bool The URL of the stored data item, or false on error + * @throws MWException + */ + public static function insertToDefault( $data, array $params = [] ) { + global $wgDefaultExternalStore; + + return self::insertWithFallback( (array)$wgDefaultExternalStore, $data, $params ); + } + + /** + * Like insert() above, but does more of the work for us. + * This function does not need a url param, it builds it by + * itself. It also fails-over to the next possible clusters + * as provided in the first parameter. + * + * @param array $tryStores Refer to $wgDefaultExternalStore + * @param string $data + * @param array $params Map of ExternalStoreMedium::__construct context parameters + * @return string|bool The URL of the stored data item, or false on error + * @throws MWException + */ + public static function insertWithFallback( array $tryStores, $data, array $params = [] ) { + $error = false; + while ( count( $tryStores ) > 0 ) { + $index = mt_rand( 0, count( $tryStores ) - 1 ); + $storeUrl = $tryStores[$index]; + wfDebug( __METHOD__ . ": trying $storeUrl\n" ); + list( $proto, $path ) = explode( '://', $storeUrl, 2 ); + $store = self::getStoreObject( $proto, $params ); + if ( $store === false ) { + throw new MWException( "Invalid external storage protocol - $storeUrl" ); + } + + try { + if ( $store->isReadOnly( $path ) ) { + $msg = 'read only'; + } else { + $url = $store->store( $path, $data ); + if ( strlen( $url ) ) { + return $url; // a store accepted the write; done! + } + $msg = 'operation failed'; + } + } catch ( Exception $error ) { + $msg = 'caught exception'; + } + + unset( $tryStores[$index] ); // Don't try this one again! + $tryStores = array_values( $tryStores ); // Must have consecutive keys + wfDebugLog( 'ExternalStorage', + "Unable to store text to external storage $storeUrl ($msg)" ); + } + // All stores failed + if ( $error ) { + throw $error; // rethrow the last error + } else { + throw new MWException( "Unable to store text to external storage" ); + } + } + + /** + * @return bool Whether all the default insertion stores are marked as read-only + * @since 1.31 + */ + public static function defaultStoresAreReadOnly() { + global $wgDefaultExternalStore; + + $tryStores = (array)$wgDefaultExternalStore; + if ( !$tryStores ) { + return false; // no stores exists which can be "read only" + } + + foreach ( $tryStores as $storeUrl ) { + list( $proto, $path ) = explode( '://', $storeUrl, 2 ); + $store = self::getStoreObject( $proto, [] ); + if ( !$store->isReadOnly( $path ) ) { + return false; // at least one store is not read-only + } + } + + return true; // all stores are read-only + } + + /** + * @param string $data + * @param string $wiki + * @return string|bool The URL of the stored data item, or false on error + * @throws MWException + */ + public static function insertToForeignDefault( $data, $wiki ) { + return self::insertToDefault( $data, [ 'wiki' => $wiki ] ); + } +} diff --git a/www/wiki/includes/externalstore/ExternalStoreDB.php b/www/wiki/includes/externalstore/ExternalStoreDB.php new file mode 100644 index 00000000..22a2d2bc --- /dev/null +++ b/www/wiki/includes/externalstore/ExternalStoreDB.php @@ -0,0 +1,335 @@ +<?php +/** + * External storage in SQL database. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +use MediaWiki\MediaWikiServices; +use Wikimedia\Rdbms\ILoadBalancer; +use Wikimedia\Rdbms\IDatabase; +use Wikimedia\Rdbms\DBConnRef; +use Wikimedia\Rdbms\MaintainableDBConnRef; +use Wikimedia\Rdbms\DatabaseDomain; + +/** + * DB accessible external objects. + * + * In this system, each store "location" maps to a database "cluster". + * The clusters must be defined in the normal LBFactory configuration. + * + * @ingroup ExternalStorage + */ +class ExternalStoreDB extends ExternalStoreMedium { + /** + * The provided URL is in the form of DB://cluster/id + * or DB://cluster/id/itemid for concatened storage. + * + * @param string $url + * @return string|bool False if missing + * @see ExternalStoreMedium::fetchFromURL() + */ + public function fetchFromURL( $url ) { + list( $cluster, $id, $itemID ) = $this->parseURL( $url ); + $ret = $this->fetchBlob( $cluster, $id, $itemID ); + + if ( $itemID !== false && $ret !== false ) { + return $ret->getItem( $itemID ); + } + + return $ret; + } + + /** + * Fetch data from given external store URLs. + * The provided URLs are in the form of DB://cluster/id + * or DB://cluster/id/itemid for concatened storage. + * + * @param array $urls An array of external store URLs + * @return array A map from url to stored content. Failed results + * are not represented. + */ + public function batchFetchFromURLs( array $urls ) { + $batched = $inverseUrlMap = []; + foreach ( $urls as $url ) { + list( $cluster, $id, $itemID ) = $this->parseURL( $url ); + $batched[$cluster][$id][] = $itemID; + // false $itemID gets cast to int, but should be ok + // since we do === from the $itemID in $batched + $inverseUrlMap[$cluster][$id][$itemID] = $url; + } + $ret = []; + foreach ( $batched as $cluster => $batchByCluster ) { + $res = $this->batchFetchBlobs( $cluster, $batchByCluster ); + /** @var HistoryBlob $blob */ + foreach ( $res as $id => $blob ) { + foreach ( $batchByCluster[$id] as $itemID ) { + $url = $inverseUrlMap[$cluster][$id][$itemID]; + if ( $itemID === false ) { + $ret[$url] = $blob; + } else { + $ret[$url] = $blob->getItem( $itemID ); + } + } + } + } + + return $ret; + } + + public function store( $location, $data ) { + $dbw = $this->getMaster( $location ); + $dbw->insert( $this->getTable( $dbw ), + [ 'blob_text' => $data ], + __METHOD__ ); + $id = $dbw->insertId(); + if ( !$id ) { + throw new MWException( __METHOD__ . ': no insert ID' ); + } + + return "DB://$location/$id"; + } + + public function isReadOnly( $location ) { + return ( $this->getLoadBalancer( $location )->getReadOnlyReason() !== false ); + } + + /** + * Get a LoadBalancer for the specified cluster + * + * @param string $cluster Cluster name + * @return ILoadBalancer + */ + private function getLoadBalancer( $cluster ) { + $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); + return $lbFactory->getExternalLB( $cluster ); + } + + /** + * Get a replica DB connection for the specified cluster + * + * @param string $cluster Cluster name + * @return DBConnRef + */ + public function getSlave( $cluster ) { + global $wgDefaultExternalStore; + + $lb = $this->getLoadBalancer( $cluster ); + $domainId = $this->getDomainId( $lb->getServerInfo( $lb->getWriterIndex() ) ); + + if ( !in_array( "DB://" . $cluster, (array)$wgDefaultExternalStore ) ) { + wfDebug( "read only external store\n" ); + $lb->allowLagged( true ); + } else { + wfDebug( "writable external store\n" ); + } + + $db = $lb->getConnectionRef( DB_REPLICA, [], $domainId ); + $db->clearFlag( DBO_TRX ); // sanity + + return $db; + } + + /** + * Get a master database connection for the specified cluster + * + * @param string $cluster Cluster name + * @return MaintainableDBConnRef + */ + public function getMaster( $cluster ) { + $lb = $this->getLoadBalancer( $cluster ); + $domainId = $this->getDomainId( $lb->getServerInfo( $lb->getWriterIndex() ) ); + + $db = $lb->getMaintenanceConnectionRef( DB_MASTER, [], $domainId ); + $db->clearFlag( DBO_TRX ); // sanity + + return $db; + } + + /** + * @param array $server Master DB server configuration array for LoadBalancer + * @return string|bool Database domain ID or false + */ + private function getDomainId( array $server ) { + if ( isset( $this->params['wiki'] ) ) { + return $this->params['wiki']; // explicit domain + } + + if ( isset( $server['dbname'] ) ) { + // T200471: for b/c, treat any "dbname" field as forcing which database to use. + // MediaWiki/LoadBalancer previously did not enforce any concept of a local DB + // domain, but rather assumed that the LB server configuration matched $wgDBname. + // This check is useful when the external storage DB for this cluster does not use + // the same name as the corresponding "main" DB(s) for wikis. + $domain = new DatabaseDomain( + $server['dbname'], + $server['schema'] ?? null, + $server['tablePrefix'] ?? '' + ); + + return $domain->getId(); + } + + return false; // local LB domain + } + + /** + * Get the 'blobs' table name for this database + * + * @param IDatabase $db + * @return string Table name ('blobs' by default) + */ + public function getTable( $db ) { + $table = $db->getLBInfo( 'blobs table' ); + if ( is_null( $table ) ) { + $table = 'blobs'; + } + + return $table; + } + + /** + * Fetch a blob item out of the database; a cache of the last-loaded + * blob will be kept so that multiple loads out of a multi-item blob + * can avoid redundant database access and decompression. + * @param string $cluster + * @param string $id + * @param string $itemID + * @return HistoryBlob|bool Returns false if missing + */ + private function fetchBlob( $cluster, $id, $itemID ) { + /** + * One-step cache variable to hold base blobs; operations that + * pull multiple revisions may often pull multiple times from + * the same blob. By keeping the last-used one open, we avoid + * redundant unserialization and decompression overhead. + */ + static $externalBlobCache = []; + + $cacheID = ( $itemID === false ) ? "$cluster/$id" : "$cluster/$id/"; + if ( isset( $externalBlobCache[$cacheID] ) ) { + wfDebugLog( 'ExternalStoreDB-cache', + "ExternalStoreDB::fetchBlob cache hit on $cacheID" ); + + return $externalBlobCache[$cacheID]; + } + + wfDebugLog( 'ExternalStoreDB-cache', + "ExternalStoreDB::fetchBlob cache miss on $cacheID" ); + + $dbr = $this->getSlave( $cluster ); + $ret = $dbr->selectField( $this->getTable( $dbr ), + 'blob_text', [ 'blob_id' => $id ], __METHOD__ ); + if ( $ret === false ) { + wfDebugLog( 'ExternalStoreDB', + "ExternalStoreDB::fetchBlob master fallback on $cacheID" ); + // Try the master + $dbw = $this->getMaster( $cluster ); + $ret = $dbw->selectField( $this->getTable( $dbw ), + 'blob_text', [ 'blob_id' => $id ], __METHOD__ ); + if ( $ret === false ) { + wfDebugLog( 'ExternalStoreDB', + "ExternalStoreDB::fetchBlob master failed to find $cacheID" ); + } + } + if ( $itemID !== false && $ret !== false ) { + // Unserialise object; caller extracts item + $ret = unserialize( $ret ); + } + + $externalBlobCache = [ $cacheID => $ret ]; + + return $ret; + } + + /** + * Fetch multiple blob items out of the database + * + * @param string $cluster A cluster name valid for use with LBFactory + * @param array $ids A map from the blob_id's to look for to the requested itemIDs in the blobs + * @return array A map from the blob_id's requested to their content. + * Unlocated ids are not represented + */ + private function batchFetchBlobs( $cluster, array $ids ) { + $dbr = $this->getSlave( $cluster ); + $res = $dbr->select( $this->getTable( $dbr ), + [ 'blob_id', 'blob_text' ], [ 'blob_id' => array_keys( $ids ) ], __METHOD__ ); + $ret = []; + if ( $res !== false ) { + $this->mergeBatchResult( $ret, $ids, $res ); + } + if ( $ids ) { + wfDebugLog( __CLASS__, __METHOD__ . + " master fallback on '$cluster' for: " . + implode( ',', array_keys( $ids ) ) ); + // Try the master + $dbw = $this->getMaster( $cluster ); + $res = $dbw->select( $this->getTable( $dbr ), + [ 'blob_id', 'blob_text' ], + [ 'blob_id' => array_keys( $ids ) ], + __METHOD__ ); + if ( $res === false ) { + wfDebugLog( __CLASS__, __METHOD__ . " master failed on '$cluster'" ); + } else { + $this->mergeBatchResult( $ret, $ids, $res ); + } + } + if ( $ids ) { + wfDebugLog( __CLASS__, __METHOD__ . + " master on '$cluster' failed locating items: " . + implode( ',', array_keys( $ids ) ) ); + } + + return $ret; + } + + /** + * Helper function for self::batchFetchBlobs for merging master/replica DB results + * @param array &$ret Current self::batchFetchBlobs return value + * @param array &$ids Map from blob_id to requested itemIDs + * @param mixed $res DB result from Database::select + */ + private function mergeBatchResult( array &$ret, array &$ids, $res ) { + foreach ( $res as $row ) { + $id = $row->blob_id; + $itemIDs = $ids[$id]; + unset( $ids[$id] ); // to track if everything is found + if ( count( $itemIDs ) === 1 && reset( $itemIDs ) === false ) { + // single result stored per blob + $ret[$id] = $row->blob_text; + } else { + // multi result stored per blob + $ret[$id] = unserialize( $row->blob_text ); + } + } + } + + /** + * @param string $url + * @return array + */ + protected function parseURL( $url ) { + $path = explode( '/', $url ); + + return [ + $path[2], // cluster + $path[3], // id + isset( $path[4] ) ? $path[4] : false // itemID + ]; + } +} diff --git a/www/wiki/includes/externalstore/ExternalStoreFactory.php b/www/wiki/includes/externalstore/ExternalStoreFactory.php new file mode 100644 index 00000000..940fb2e2 --- /dev/null +++ b/www/wiki/includes/externalstore/ExternalStoreFactory.php @@ -0,0 +1,42 @@ +<?php +/** + * @defgroup ExternalStorage ExternalStorage + */ + +/** + * @ingroup ExternalStorage + */ +class ExternalStoreFactory { + + /** + * @var array + */ + private $externalStores; + + /** + * @param array $externalStores See $wgExternalStores + */ + public function __construct( array $externalStores ) { + $this->externalStores = array_map( 'strtolower', $externalStores ); + } + + /** + * Get an external store object of the given type, with the given parameters + * + * @param string $proto Type of external storage, should be a value in $wgExternalStores + * @param array $params Associative array of ExternalStoreMedium parameters + * @return ExternalStoreMedium|bool The store class or false on error + */ + public function getStoreObject( $proto, array $params = [] ) { + if ( !$this->externalStores || !in_array( strtolower( $proto ), $this->externalStores ) ) { + // Protocol not enabled + return false; + } + + $class = 'ExternalStore' . ucfirst( $proto ); + + // Any custom modules should be added to $wgAutoLoadClasses for on-demand loading + return class_exists( $class ) ? new $class( $params ) : false; + } + +} diff --git a/www/wiki/includes/externalstore/ExternalStoreHttp.php b/www/wiki/includes/externalstore/ExternalStoreHttp.php new file mode 100644 index 00000000..879686f7 --- /dev/null +++ b/www/wiki/includes/externalstore/ExternalStoreHttp.php @@ -0,0 +1,41 @@ +<?php +/** + * External storage using HTTP requests. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * Example class for HTTP accessible external objects. + * Only supports reading, not storing. + * + * @ingroup ExternalStorage + */ +class ExternalStoreHttp extends ExternalStoreMedium { + public function fetchFromURL( $url ) { + return Http::get( $url, [], __METHOD__ ); + } + + public function store( $location, $data ) { + throw new MWException( "ExternalStoreHttp is read-only and does not support store()." ); + } + + public function isReadOnly( $location ) { + return true; + } +} diff --git a/www/wiki/includes/externalstore/ExternalStoreMedium.php b/www/wiki/includes/externalstore/ExternalStoreMedium.php new file mode 100644 index 00000000..da7752b7 --- /dev/null +++ b/www/wiki/includes/externalstore/ExternalStoreMedium.php @@ -0,0 +1,91 @@ +<?php +/** + * External storage in some particular medium. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup ExternalStorage + */ + +/** + * Accessable external objects in a particular storage medium + * + * @ingroup ExternalStorage + * @since 1.21 + */ +abstract class ExternalStoreMedium { + /** @var array */ + protected $params = []; + + /** + * @param array $params Usage context options: + * - wiki: the domain ID of the wiki this is being used for [optional] + */ + public function __construct( array $params = [] ) { + $this->params = $params; + } + + /** + * Fetch data from given external store URL + * + * @param string $url An external store URL + * @return string|bool The text stored or false on error + * @throws MWException + */ + abstract public function fetchFromURL( $url ); + + /** + * Fetch data from given external store URLs. + * + * @param array $urls A list of external store URLs + * @return array Map from the url to the text stored. Unfound data is not represented + */ + public function batchFetchFromURLs( array $urls ) { + $retval = []; + foreach ( $urls as $url ) { + $data = $this->fetchFromURL( $url ); + // Dont return when false to allow for simpler implementations. + // errored urls are handled in ExternalStore::batchFetchFromURLs + if ( $data !== false ) { + $retval[$url] = $data; + } + } + + return $retval; + } + + /** + * Insert a data item into a given location + * + * @param string $location The location name + * @param string $data The data item + * @return string|bool The URL of the stored data item, or false on error + * @throws MWException + */ + abstract public function store( $location, $data ); + + /** + * Check if a given location is read-only + * + * @param string $location The location name + * @return bool Whether this location is read-only + * @since 1.31 + */ + public function isReadOnly( $location ) { + return false; + } +} diff --git a/www/wiki/includes/externalstore/ExternalStoreMwstore.php b/www/wiki/includes/externalstore/ExternalStoreMwstore.php new file mode 100644 index 00000000..5d7155e8 --- /dev/null +++ b/www/wiki/includes/externalstore/ExternalStoreMwstore.php @@ -0,0 +1,105 @@ +<?php +/** + * External storage in a file backend. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * File backend accessible external objects. + * + * In this system, each store "location" maps to the name of a file backend. + * The file backends must be defined in $wgFileBackends and must be global + * and fully qualified with a global "wikiId" prefix in the configuration. + * + * @ingroup ExternalStorage + * @since 1.21 + */ +class ExternalStoreMwstore extends ExternalStoreMedium { + /** + * The URL returned is of the form of the form mwstore://backend/container/wiki/id + * + * @see ExternalStoreMedium::fetchFromURL() + * @param string $url + * @return bool + */ + public function fetchFromURL( $url ) { + $be = FileBackendGroup::singleton()->backendFromPath( $url ); + if ( $be instanceof FileBackend ) { + // We don't need "latest" since objects are immutable and + // backends should at least have "read-after-create" consistency. + return $be->getFileContents( [ 'src' => $url ] ); + } + + return false; + } + + /** + * Fetch data from given external store URLs. + * The URL returned is of the form of the form mwstore://backend/container/wiki/id + * + * @param array $urls An array of external store URLs + * @return array A map from url to stored content. Failed results are not represented. + */ + public function batchFetchFromURLs( array $urls ) { + $pathsByBackend = []; + foreach ( $urls as $url ) { + $be = FileBackendGroup::singleton()->backendFromPath( $url ); + if ( $be instanceof FileBackend ) { + $pathsByBackend[$be->getName()][] = $url; + } + } + $blobs = []; + foreach ( $pathsByBackend as $backendName => $paths ) { + $be = FileBackendGroup::singleton()->get( $backendName ); + $blobs = $blobs + $be->getFileContentsMulti( [ 'srcs' => $paths ] ); + } + + return $blobs; + } + + public function store( $backend, $data ) { + $be = FileBackendGroup::singleton()->get( $backend ); + if ( $be instanceof FileBackend ) { + // Get three random base 36 characters to act as shard directories + $rand = Wikimedia\base_convert( mt_rand( 0, 46655 ), 10, 36, 3 ); + // Make sure ID is roughly lexicographically increasing for performance + $id = str_pad( UIDGenerator::newTimestampedUID128( 32 ), 26, '0', STR_PAD_LEFT ); + // Segregate items by wiki ID for the sake of bookkeeping + $wiki = isset( $this->params['wiki'] ) ? $this->params['wiki'] : wfWikiID(); + + $url = $be->getContainerStoragePath( 'data' ) . '/' . rawurlencode( $wiki ); + $url .= ( $be instanceof FSFileBackend ) + ? "/{$rand[0]}/{$rand[1]}/{$rand[2]}/{$id}" // keep directories small + : "/{$rand[0]}/{$rand[1]}/{$id}"; // container sharding is only 2-levels + + $be->prepare( [ 'dir' => dirname( $url ), 'noAccess' => 1, 'noListing' => 1 ] ); + if ( $be->create( [ 'dst' => $url, 'content' => $data ] )->isOK() ) { + return $url; + } + } + + return false; + } + + public function isReadOnly( $backend ) { + $be = FileBackendGroup::singleton()->get( $backend ); + + return $be ? $be->isReadOnly() : false; + } +} |