summaryrefslogtreecommitdiff
path: root/www/wiki/includes/externalstore
diff options
context:
space:
mode:
authorYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
committerYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
commitfc7369835258467bf97eb64f184b93691f9a9fd5 (patch)
treedaabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/includes/externalstore
first commit
Diffstat (limited to 'www/wiki/includes/externalstore')
-rw-r--r--www/wiki/includes/externalstore/ExternalStore.php254
-rw-r--r--www/wiki/includes/externalstore/ExternalStoreDB.php335
-rw-r--r--www/wiki/includes/externalstore/ExternalStoreFactory.php42
-rw-r--r--www/wiki/includes/externalstore/ExternalStoreHttp.php41
-rw-r--r--www/wiki/includes/externalstore/ExternalStoreMedium.php91
-rw-r--r--www/wiki/includes/externalstore/ExternalStoreMwstore.php105
6 files changed, 868 insertions, 0 deletions
diff --git a/www/wiki/includes/externalstore/ExternalStore.php b/www/wiki/includes/externalstore/ExternalStore.php
new file mode 100644
index 00000000..de7d1a4c
--- /dev/null
+++ b/www/wiki/includes/externalstore/ExternalStore.php
@@ -0,0 +1,254 @@
+<?php
+/**
+ * @defgroup ExternalStorage ExternalStorage
+ */
+
+use MediaWiki\MediaWikiServices;
+
+/**
+ * Interface for data storage in external repositories.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Constructor class for key/value blob data kept in external repositories.
+ *
+ * Objects in external stores are defined by a special URL. The URL is of
+ * the form "<store protocol>://<location>/<object name>". The protocol is used
+ * to determine what ExternalStoreMedium class is used. The location identifies
+ * particular storage instances or database clusters for store class to use.
+ *
+ * When an object is inserted into a store, the calling code uses a partial URL of
+ * the form "<store protocol>://<location>" and receives the full object URL on success.
+ * This is useful since object names can be sequential IDs, UUIDs, or hashes.
+ * Callers are not responsible for unique name generation.
+ *
+ * External repositories might be populated by maintenance/async
+ * scripts, thus partial moving of data may be possible, as well
+ * as the possibility to have any storage format (i.e. for archives).
+ *
+ * @ingroup ExternalStorage
+ */
+class ExternalStore {
+ /**
+ * Get an external store object of the given type, with the given parameters
+ *
+ * @param string $proto Type of external storage, should be a value in $wgExternalStores
+ * @param array $params Associative array of ExternalStoreMedium parameters
+ * @return ExternalStoreMedium|bool The store class or false on error
+ */
+ public static function getStoreObject( $proto, array $params = [] ) {
+ return MediaWikiServices::getInstance()
+ ->getExternalStoreFactory()
+ ->getStoreObject( $proto, $params );
+ }
+
+ /**
+ * Fetch data from given URL
+ *
+ * @param string $url The URL of the text to get
+ * @param array $params Associative array of ExternalStoreMedium parameters
+ * @return string|bool The text stored or false on error
+ * @throws MWException
+ */
+ public static function fetchFromURL( $url, array $params = [] ) {
+ $parts = explode( '://', $url, 2 );
+ if ( count( $parts ) != 2 ) {
+ return false; // invalid URL
+ }
+
+ list( $proto, $path ) = $parts;
+ if ( $path == '' ) { // bad URL
+ return false;
+ }
+
+ $store = self::getStoreObject( $proto, $params );
+ if ( $store === false ) {
+ return false;
+ }
+
+ return $store->fetchFromURL( $url );
+ }
+
+ /**
+ * Fetch data from multiple URLs with a minimum of round trips
+ *
+ * @param array $urls The URLs of the text to get
+ * @return array Map from url to its data. Data is either string when found
+ * or false on failure.
+ * @throws MWException
+ */
+ public static function batchFetchFromURLs( array $urls ) {
+ $batches = [];
+ foreach ( $urls as $url ) {
+ $scheme = parse_url( $url, PHP_URL_SCHEME );
+ if ( $scheme ) {
+ $batches[$scheme][] = $url;
+ }
+ }
+ $retval = [];
+ foreach ( $batches as $proto => $batchedUrls ) {
+ $store = self::getStoreObject( $proto );
+ if ( $store === false ) {
+ continue;
+ }
+ $retval += $store->batchFetchFromURLs( $batchedUrls );
+ }
+ // invalid, not found, db dead, etc.
+ $missing = array_diff( $urls, array_keys( $retval ) );
+ if ( $missing ) {
+ foreach ( $missing as $url ) {
+ $retval[$url] = false;
+ }
+ }
+
+ return $retval;
+ }
+
+ /**
+ * Store a data item to an external store, identified by a partial URL
+ * The protocol part is used to identify the class, the rest is passed to the
+ * class itself as a parameter.
+ *
+ * @param string $url A partial external store URL ("<store type>://<location>")
+ * @param string $data
+ * @param array $params Associative array of ExternalStoreMedium parameters
+ * @return string|bool The URL of the stored data item, or false on error
+ * @throws MWException
+ */
+ public static function insert( $url, $data, array $params = [] ) {
+ $parts = explode( '://', $url, 2 );
+ if ( count( $parts ) != 2 ) {
+ return false; // invalid URL
+ }
+
+ list( $proto, $path ) = $parts;
+ if ( $path == '' ) { // bad URL
+ return false;
+ }
+
+ $store = self::getStoreObject( $proto, $params );
+ if ( $store === false ) {
+ return false;
+ } else {
+ return $store->store( $path, $data );
+ }
+ }
+
+ /**
+ * Like insert() above, but does more of the work for us.
+ * This function does not need a url param, it builds it by
+ * itself. It also fails-over to the next possible clusters
+ * provided by $wgDefaultExternalStore.
+ *
+ * @param string $data
+ * @param array $params Map of ExternalStoreMedium::__construct context parameters
+ * @return string|bool The URL of the stored data item, or false on error
+ * @throws MWException
+ */
+ public static function insertToDefault( $data, array $params = [] ) {
+ global $wgDefaultExternalStore;
+
+ return self::insertWithFallback( (array)$wgDefaultExternalStore, $data, $params );
+ }
+
+ /**
+ * Like insert() above, but does more of the work for us.
+ * This function does not need a url param, it builds it by
+ * itself. It also fails-over to the next possible clusters
+ * as provided in the first parameter.
+ *
+ * @param array $tryStores Refer to $wgDefaultExternalStore
+ * @param string $data
+ * @param array $params Map of ExternalStoreMedium::__construct context parameters
+ * @return string|bool The URL of the stored data item, or false on error
+ * @throws MWException
+ */
+ public static function insertWithFallback( array $tryStores, $data, array $params = [] ) {
+ $error = false;
+ while ( count( $tryStores ) > 0 ) {
+ $index = mt_rand( 0, count( $tryStores ) - 1 );
+ $storeUrl = $tryStores[$index];
+ wfDebug( __METHOD__ . ": trying $storeUrl\n" );
+ list( $proto, $path ) = explode( '://', $storeUrl, 2 );
+ $store = self::getStoreObject( $proto, $params );
+ if ( $store === false ) {
+ throw new MWException( "Invalid external storage protocol - $storeUrl" );
+ }
+
+ try {
+ if ( $store->isReadOnly( $path ) ) {
+ $msg = 'read only';
+ } else {
+ $url = $store->store( $path, $data );
+ if ( strlen( $url ) ) {
+ return $url; // a store accepted the write; done!
+ }
+ $msg = 'operation failed';
+ }
+ } catch ( Exception $error ) {
+ $msg = 'caught exception';
+ }
+
+ unset( $tryStores[$index] ); // Don't try this one again!
+ $tryStores = array_values( $tryStores ); // Must have consecutive keys
+ wfDebugLog( 'ExternalStorage',
+ "Unable to store text to external storage $storeUrl ($msg)" );
+ }
+ // All stores failed
+ if ( $error ) {
+ throw $error; // rethrow the last error
+ } else {
+ throw new MWException( "Unable to store text to external storage" );
+ }
+ }
+
+ /**
+ * @return bool Whether all the default insertion stores are marked as read-only
+ * @since 1.31
+ */
+ public static function defaultStoresAreReadOnly() {
+ global $wgDefaultExternalStore;
+
+ $tryStores = (array)$wgDefaultExternalStore;
+ if ( !$tryStores ) {
+ return false; // no stores exists which can be "read only"
+ }
+
+ foreach ( $tryStores as $storeUrl ) {
+ list( $proto, $path ) = explode( '://', $storeUrl, 2 );
+ $store = self::getStoreObject( $proto, [] );
+ if ( !$store->isReadOnly( $path ) ) {
+ return false; // at least one store is not read-only
+ }
+ }
+
+ return true; // all stores are read-only
+ }
+
+ /**
+ * @param string $data
+ * @param string $wiki
+ * @return string|bool The URL of the stored data item, or false on error
+ * @throws MWException
+ */
+ public static function insertToForeignDefault( $data, $wiki ) {
+ return self::insertToDefault( $data, [ 'wiki' => $wiki ] );
+ }
+}
diff --git a/www/wiki/includes/externalstore/ExternalStoreDB.php b/www/wiki/includes/externalstore/ExternalStoreDB.php
new file mode 100644
index 00000000..22a2d2bc
--- /dev/null
+++ b/www/wiki/includes/externalstore/ExternalStoreDB.php
@@ -0,0 +1,335 @@
+<?php
+/**
+ * External storage in SQL database.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+use MediaWiki\MediaWikiServices;
+use Wikimedia\Rdbms\ILoadBalancer;
+use Wikimedia\Rdbms\IDatabase;
+use Wikimedia\Rdbms\DBConnRef;
+use Wikimedia\Rdbms\MaintainableDBConnRef;
+use Wikimedia\Rdbms\DatabaseDomain;
+
+/**
+ * DB accessible external objects.
+ *
+ * In this system, each store "location" maps to a database "cluster".
+ * The clusters must be defined in the normal LBFactory configuration.
+ *
+ * @ingroup ExternalStorage
+ */
+class ExternalStoreDB extends ExternalStoreMedium {
+ /**
+ * The provided URL is in the form of DB://cluster/id
+ * or DB://cluster/id/itemid for concatened storage.
+ *
+ * @param string $url
+ * @return string|bool False if missing
+ * @see ExternalStoreMedium::fetchFromURL()
+ */
+ public function fetchFromURL( $url ) {
+ list( $cluster, $id, $itemID ) = $this->parseURL( $url );
+ $ret = $this->fetchBlob( $cluster, $id, $itemID );
+
+ if ( $itemID !== false && $ret !== false ) {
+ return $ret->getItem( $itemID );
+ }
+
+ return $ret;
+ }
+
+ /**
+ * Fetch data from given external store URLs.
+ * The provided URLs are in the form of DB://cluster/id
+ * or DB://cluster/id/itemid for concatened storage.
+ *
+ * @param array $urls An array of external store URLs
+ * @return array A map from url to stored content. Failed results
+ * are not represented.
+ */
+ public function batchFetchFromURLs( array $urls ) {
+ $batched = $inverseUrlMap = [];
+ foreach ( $urls as $url ) {
+ list( $cluster, $id, $itemID ) = $this->parseURL( $url );
+ $batched[$cluster][$id][] = $itemID;
+ // false $itemID gets cast to int, but should be ok
+ // since we do === from the $itemID in $batched
+ $inverseUrlMap[$cluster][$id][$itemID] = $url;
+ }
+ $ret = [];
+ foreach ( $batched as $cluster => $batchByCluster ) {
+ $res = $this->batchFetchBlobs( $cluster, $batchByCluster );
+ /** @var HistoryBlob $blob */
+ foreach ( $res as $id => $blob ) {
+ foreach ( $batchByCluster[$id] as $itemID ) {
+ $url = $inverseUrlMap[$cluster][$id][$itemID];
+ if ( $itemID === false ) {
+ $ret[$url] = $blob;
+ } else {
+ $ret[$url] = $blob->getItem( $itemID );
+ }
+ }
+ }
+ }
+
+ return $ret;
+ }
+
+ public function store( $location, $data ) {
+ $dbw = $this->getMaster( $location );
+ $dbw->insert( $this->getTable( $dbw ),
+ [ 'blob_text' => $data ],
+ __METHOD__ );
+ $id = $dbw->insertId();
+ if ( !$id ) {
+ throw new MWException( __METHOD__ . ': no insert ID' );
+ }
+
+ return "DB://$location/$id";
+ }
+
+ public function isReadOnly( $location ) {
+ return ( $this->getLoadBalancer( $location )->getReadOnlyReason() !== false );
+ }
+
+ /**
+ * Get a LoadBalancer for the specified cluster
+ *
+ * @param string $cluster Cluster name
+ * @return ILoadBalancer
+ */
+ private function getLoadBalancer( $cluster ) {
+ $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
+ return $lbFactory->getExternalLB( $cluster );
+ }
+
+ /**
+ * Get a replica DB connection for the specified cluster
+ *
+ * @param string $cluster Cluster name
+ * @return DBConnRef
+ */
+ public function getSlave( $cluster ) {
+ global $wgDefaultExternalStore;
+
+ $lb = $this->getLoadBalancer( $cluster );
+ $domainId = $this->getDomainId( $lb->getServerInfo( $lb->getWriterIndex() ) );
+
+ if ( !in_array( "DB://" . $cluster, (array)$wgDefaultExternalStore ) ) {
+ wfDebug( "read only external store\n" );
+ $lb->allowLagged( true );
+ } else {
+ wfDebug( "writable external store\n" );
+ }
+
+ $db = $lb->getConnectionRef( DB_REPLICA, [], $domainId );
+ $db->clearFlag( DBO_TRX ); // sanity
+
+ return $db;
+ }
+
+ /**
+ * Get a master database connection for the specified cluster
+ *
+ * @param string $cluster Cluster name
+ * @return MaintainableDBConnRef
+ */
+ public function getMaster( $cluster ) {
+ $lb = $this->getLoadBalancer( $cluster );
+ $domainId = $this->getDomainId( $lb->getServerInfo( $lb->getWriterIndex() ) );
+
+ $db = $lb->getMaintenanceConnectionRef( DB_MASTER, [], $domainId );
+ $db->clearFlag( DBO_TRX ); // sanity
+
+ return $db;
+ }
+
+ /**
+ * @param array $server Master DB server configuration array for LoadBalancer
+ * @return string|bool Database domain ID or false
+ */
+ private function getDomainId( array $server ) {
+ if ( isset( $this->params['wiki'] ) ) {
+ return $this->params['wiki']; // explicit domain
+ }
+
+ if ( isset( $server['dbname'] ) ) {
+ // T200471: for b/c, treat any "dbname" field as forcing which database to use.
+ // MediaWiki/LoadBalancer previously did not enforce any concept of a local DB
+ // domain, but rather assumed that the LB server configuration matched $wgDBname.
+ // This check is useful when the external storage DB for this cluster does not use
+ // the same name as the corresponding "main" DB(s) for wikis.
+ $domain = new DatabaseDomain(
+ $server['dbname'],
+ $server['schema'] ?? null,
+ $server['tablePrefix'] ?? ''
+ );
+
+ return $domain->getId();
+ }
+
+ return false; // local LB domain
+ }
+
+ /**
+ * Get the 'blobs' table name for this database
+ *
+ * @param IDatabase $db
+ * @return string Table name ('blobs' by default)
+ */
+ public function getTable( $db ) {
+ $table = $db->getLBInfo( 'blobs table' );
+ if ( is_null( $table ) ) {
+ $table = 'blobs';
+ }
+
+ return $table;
+ }
+
+ /**
+ * Fetch a blob item out of the database; a cache of the last-loaded
+ * blob will be kept so that multiple loads out of a multi-item blob
+ * can avoid redundant database access and decompression.
+ * @param string $cluster
+ * @param string $id
+ * @param string $itemID
+ * @return HistoryBlob|bool Returns false if missing
+ */
+ private function fetchBlob( $cluster, $id, $itemID ) {
+ /**
+ * One-step cache variable to hold base blobs; operations that
+ * pull multiple revisions may often pull multiple times from
+ * the same blob. By keeping the last-used one open, we avoid
+ * redundant unserialization and decompression overhead.
+ */
+ static $externalBlobCache = [];
+
+ $cacheID = ( $itemID === false ) ? "$cluster/$id" : "$cluster/$id/";
+ if ( isset( $externalBlobCache[$cacheID] ) ) {
+ wfDebugLog( 'ExternalStoreDB-cache',
+ "ExternalStoreDB::fetchBlob cache hit on $cacheID" );
+
+ return $externalBlobCache[$cacheID];
+ }
+
+ wfDebugLog( 'ExternalStoreDB-cache',
+ "ExternalStoreDB::fetchBlob cache miss on $cacheID" );
+
+ $dbr = $this->getSlave( $cluster );
+ $ret = $dbr->selectField( $this->getTable( $dbr ),
+ 'blob_text', [ 'blob_id' => $id ], __METHOD__ );
+ if ( $ret === false ) {
+ wfDebugLog( 'ExternalStoreDB',
+ "ExternalStoreDB::fetchBlob master fallback on $cacheID" );
+ // Try the master
+ $dbw = $this->getMaster( $cluster );
+ $ret = $dbw->selectField( $this->getTable( $dbw ),
+ 'blob_text', [ 'blob_id' => $id ], __METHOD__ );
+ if ( $ret === false ) {
+ wfDebugLog( 'ExternalStoreDB',
+ "ExternalStoreDB::fetchBlob master failed to find $cacheID" );
+ }
+ }
+ if ( $itemID !== false && $ret !== false ) {
+ // Unserialise object; caller extracts item
+ $ret = unserialize( $ret );
+ }
+
+ $externalBlobCache = [ $cacheID => $ret ];
+
+ return $ret;
+ }
+
+ /**
+ * Fetch multiple blob items out of the database
+ *
+ * @param string $cluster A cluster name valid for use with LBFactory
+ * @param array $ids A map from the blob_id's to look for to the requested itemIDs in the blobs
+ * @return array A map from the blob_id's requested to their content.
+ * Unlocated ids are not represented
+ */
+ private function batchFetchBlobs( $cluster, array $ids ) {
+ $dbr = $this->getSlave( $cluster );
+ $res = $dbr->select( $this->getTable( $dbr ),
+ [ 'blob_id', 'blob_text' ], [ 'blob_id' => array_keys( $ids ) ], __METHOD__ );
+ $ret = [];
+ if ( $res !== false ) {
+ $this->mergeBatchResult( $ret, $ids, $res );
+ }
+ if ( $ids ) {
+ wfDebugLog( __CLASS__, __METHOD__ .
+ " master fallback on '$cluster' for: " .
+ implode( ',', array_keys( $ids ) ) );
+ // Try the master
+ $dbw = $this->getMaster( $cluster );
+ $res = $dbw->select( $this->getTable( $dbr ),
+ [ 'blob_id', 'blob_text' ],
+ [ 'blob_id' => array_keys( $ids ) ],
+ __METHOD__ );
+ if ( $res === false ) {
+ wfDebugLog( __CLASS__, __METHOD__ . " master failed on '$cluster'" );
+ } else {
+ $this->mergeBatchResult( $ret, $ids, $res );
+ }
+ }
+ if ( $ids ) {
+ wfDebugLog( __CLASS__, __METHOD__ .
+ " master on '$cluster' failed locating items: " .
+ implode( ',', array_keys( $ids ) ) );
+ }
+
+ return $ret;
+ }
+
+ /**
+ * Helper function for self::batchFetchBlobs for merging master/replica DB results
+ * @param array &$ret Current self::batchFetchBlobs return value
+ * @param array &$ids Map from blob_id to requested itemIDs
+ * @param mixed $res DB result from Database::select
+ */
+ private function mergeBatchResult( array &$ret, array &$ids, $res ) {
+ foreach ( $res as $row ) {
+ $id = $row->blob_id;
+ $itemIDs = $ids[$id];
+ unset( $ids[$id] ); // to track if everything is found
+ if ( count( $itemIDs ) === 1 && reset( $itemIDs ) === false ) {
+ // single result stored per blob
+ $ret[$id] = $row->blob_text;
+ } else {
+ // multi result stored per blob
+ $ret[$id] = unserialize( $row->blob_text );
+ }
+ }
+ }
+
+ /**
+ * @param string $url
+ * @return array
+ */
+ protected function parseURL( $url ) {
+ $path = explode( '/', $url );
+
+ return [
+ $path[2], // cluster
+ $path[3], // id
+ isset( $path[4] ) ? $path[4] : false // itemID
+ ];
+ }
+}
diff --git a/www/wiki/includes/externalstore/ExternalStoreFactory.php b/www/wiki/includes/externalstore/ExternalStoreFactory.php
new file mode 100644
index 00000000..940fb2e2
--- /dev/null
+++ b/www/wiki/includes/externalstore/ExternalStoreFactory.php
@@ -0,0 +1,42 @@
+<?php
+/**
+ * @defgroup ExternalStorage ExternalStorage
+ */
+
+/**
+ * @ingroup ExternalStorage
+ */
+class ExternalStoreFactory {
+
+ /**
+ * @var array
+ */
+ private $externalStores;
+
+ /**
+ * @param array $externalStores See $wgExternalStores
+ */
+ public function __construct( array $externalStores ) {
+ $this->externalStores = array_map( 'strtolower', $externalStores );
+ }
+
+ /**
+ * Get an external store object of the given type, with the given parameters
+ *
+ * @param string $proto Type of external storage, should be a value in $wgExternalStores
+ * @param array $params Associative array of ExternalStoreMedium parameters
+ * @return ExternalStoreMedium|bool The store class or false on error
+ */
+ public function getStoreObject( $proto, array $params = [] ) {
+ if ( !$this->externalStores || !in_array( strtolower( $proto ), $this->externalStores ) ) {
+ // Protocol not enabled
+ return false;
+ }
+
+ $class = 'ExternalStore' . ucfirst( $proto );
+
+ // Any custom modules should be added to $wgAutoLoadClasses for on-demand loading
+ return class_exists( $class ) ? new $class( $params ) : false;
+ }
+
+}
diff --git a/www/wiki/includes/externalstore/ExternalStoreHttp.php b/www/wiki/includes/externalstore/ExternalStoreHttp.php
new file mode 100644
index 00000000..879686f7
--- /dev/null
+++ b/www/wiki/includes/externalstore/ExternalStoreHttp.php
@@ -0,0 +1,41 @@
+<?php
+/**
+ * External storage using HTTP requests.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Example class for HTTP accessible external objects.
+ * Only supports reading, not storing.
+ *
+ * @ingroup ExternalStorage
+ */
+class ExternalStoreHttp extends ExternalStoreMedium {
+ public function fetchFromURL( $url ) {
+ return Http::get( $url, [], __METHOD__ );
+ }
+
+ public function store( $location, $data ) {
+ throw new MWException( "ExternalStoreHttp is read-only and does not support store()." );
+ }
+
+ public function isReadOnly( $location ) {
+ return true;
+ }
+}
diff --git a/www/wiki/includes/externalstore/ExternalStoreMedium.php b/www/wiki/includes/externalstore/ExternalStoreMedium.php
new file mode 100644
index 00000000..da7752b7
--- /dev/null
+++ b/www/wiki/includes/externalstore/ExternalStoreMedium.php
@@ -0,0 +1,91 @@
+<?php
+/**
+ * External storage in some particular medium.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup ExternalStorage
+ */
+
+/**
+ * Accessable external objects in a particular storage medium
+ *
+ * @ingroup ExternalStorage
+ * @since 1.21
+ */
+abstract class ExternalStoreMedium {
+ /** @var array */
+ protected $params = [];
+
+ /**
+ * @param array $params Usage context options:
+ * - wiki: the domain ID of the wiki this is being used for [optional]
+ */
+ public function __construct( array $params = [] ) {
+ $this->params = $params;
+ }
+
+ /**
+ * Fetch data from given external store URL
+ *
+ * @param string $url An external store URL
+ * @return string|bool The text stored or false on error
+ * @throws MWException
+ */
+ abstract public function fetchFromURL( $url );
+
+ /**
+ * Fetch data from given external store URLs.
+ *
+ * @param array $urls A list of external store URLs
+ * @return array Map from the url to the text stored. Unfound data is not represented
+ */
+ public function batchFetchFromURLs( array $urls ) {
+ $retval = [];
+ foreach ( $urls as $url ) {
+ $data = $this->fetchFromURL( $url );
+ // Dont return when false to allow for simpler implementations.
+ // errored urls are handled in ExternalStore::batchFetchFromURLs
+ if ( $data !== false ) {
+ $retval[$url] = $data;
+ }
+ }
+
+ return $retval;
+ }
+
+ /**
+ * Insert a data item into a given location
+ *
+ * @param string $location The location name
+ * @param string $data The data item
+ * @return string|bool The URL of the stored data item, or false on error
+ * @throws MWException
+ */
+ abstract public function store( $location, $data );
+
+ /**
+ * Check if a given location is read-only
+ *
+ * @param string $location The location name
+ * @return bool Whether this location is read-only
+ * @since 1.31
+ */
+ public function isReadOnly( $location ) {
+ return false;
+ }
+}
diff --git a/www/wiki/includes/externalstore/ExternalStoreMwstore.php b/www/wiki/includes/externalstore/ExternalStoreMwstore.php
new file mode 100644
index 00000000..5d7155e8
--- /dev/null
+++ b/www/wiki/includes/externalstore/ExternalStoreMwstore.php
@@ -0,0 +1,105 @@
+<?php
+/**
+ * External storage in a file backend.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * File backend accessible external objects.
+ *
+ * In this system, each store "location" maps to the name of a file backend.
+ * The file backends must be defined in $wgFileBackends and must be global
+ * and fully qualified with a global "wikiId" prefix in the configuration.
+ *
+ * @ingroup ExternalStorage
+ * @since 1.21
+ */
+class ExternalStoreMwstore extends ExternalStoreMedium {
+ /**
+ * The URL returned is of the form of the form mwstore://backend/container/wiki/id
+ *
+ * @see ExternalStoreMedium::fetchFromURL()
+ * @param string $url
+ * @return bool
+ */
+ public function fetchFromURL( $url ) {
+ $be = FileBackendGroup::singleton()->backendFromPath( $url );
+ if ( $be instanceof FileBackend ) {
+ // We don't need "latest" since objects are immutable and
+ // backends should at least have "read-after-create" consistency.
+ return $be->getFileContents( [ 'src' => $url ] );
+ }
+
+ return false;
+ }
+
+ /**
+ * Fetch data from given external store URLs.
+ * The URL returned is of the form of the form mwstore://backend/container/wiki/id
+ *
+ * @param array $urls An array of external store URLs
+ * @return array A map from url to stored content. Failed results are not represented.
+ */
+ public function batchFetchFromURLs( array $urls ) {
+ $pathsByBackend = [];
+ foreach ( $urls as $url ) {
+ $be = FileBackendGroup::singleton()->backendFromPath( $url );
+ if ( $be instanceof FileBackend ) {
+ $pathsByBackend[$be->getName()][] = $url;
+ }
+ }
+ $blobs = [];
+ foreach ( $pathsByBackend as $backendName => $paths ) {
+ $be = FileBackendGroup::singleton()->get( $backendName );
+ $blobs = $blobs + $be->getFileContentsMulti( [ 'srcs' => $paths ] );
+ }
+
+ return $blobs;
+ }
+
+ public function store( $backend, $data ) {
+ $be = FileBackendGroup::singleton()->get( $backend );
+ if ( $be instanceof FileBackend ) {
+ // Get three random base 36 characters to act as shard directories
+ $rand = Wikimedia\base_convert( mt_rand( 0, 46655 ), 10, 36, 3 );
+ // Make sure ID is roughly lexicographically increasing for performance
+ $id = str_pad( UIDGenerator::newTimestampedUID128( 32 ), 26, '0', STR_PAD_LEFT );
+ // Segregate items by wiki ID for the sake of bookkeeping
+ $wiki = isset( $this->params['wiki'] ) ? $this->params['wiki'] : wfWikiID();
+
+ $url = $be->getContainerStoragePath( 'data' ) . '/' . rawurlencode( $wiki );
+ $url .= ( $be instanceof FSFileBackend )
+ ? "/{$rand[0]}/{$rand[1]}/{$rand[2]}/{$id}" // keep directories small
+ : "/{$rand[0]}/{$rand[1]}/{$id}"; // container sharding is only 2-levels
+
+ $be->prepare( [ 'dir' => dirname( $url ), 'noAccess' => 1, 'noListing' => 1 ] );
+ if ( $be->create( [ 'dst' => $url, 'content' => $data ] )->isOK() ) {
+ return $url;
+ }
+ }
+
+ return false;
+ }
+
+ public function isReadOnly( $backend ) {
+ $be = FileBackendGroup::singleton()->get( $backend );
+
+ return $be ? $be->isReadOnly() : false;
+ }
+}