diff options
Diffstat (limited to 'www/wiki/includes/Storage/SqlBlobStore.php')
-rw-r--r-- | www/wiki/includes/Storage/SqlBlobStore.php | 600 |
1 files changed, 600 insertions, 0 deletions
diff --git a/www/wiki/includes/Storage/SqlBlobStore.php b/www/wiki/includes/Storage/SqlBlobStore.php new file mode 100644 index 00000000..0ff7c133 --- /dev/null +++ b/www/wiki/includes/Storage/SqlBlobStore.php @@ -0,0 +1,600 @@ +<?php +/** + * Service for storing and loading data blobs representing revision content. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * Attribution notice: when this file was created, much of its content was taken + * from the Revision.php file as present in release 1.30. Refer to the history + * of that file for original authorship. + * + * @file + */ + +namespace MediaWiki\Storage; + +use DBAccessObjectUtils; +use ExternalStore; +use IDBAccessObject; +use IExpiringStore; +use InvalidArgumentException; +use Language; +use MWException; +use WANObjectCache; +use Wikimedia\Assert\Assert; +use Wikimedia\Rdbms\Database; +use Wikimedia\Rdbms\IDatabase; +use Wikimedia\Rdbms\LoadBalancer; + +/** + * Service for storing and loading Content objects. + * + * @since 1.31 + * + * @note This was written to act as a drop-in replacement for the corresponding + * static methods in Revision. + */ +class SqlBlobStore implements IDBAccessObject, BlobStore { + + // Note: the name has been taken unchanged from the Revision class. + const TEXT_CACHE_GROUP = 'revisiontext:10'; + + /** + * @var LoadBalancer + */ + private $dbLoadBalancer; + + /** + * @var WANObjectCache + */ + private $cache; + + /** + * @var bool|string Wiki ID + */ + private $wikiId; + + /** + * @var int + */ + private $cacheExpiry = 604800; // 7 days + + /** + * @var bool + */ + private $compressBlobs = false; + + /** + * @var bool|string + */ + private $legacyEncoding = false; + + /** + * @var Language|null + */ + private $legacyEncodingConversionLang = null; + + /** + * @var boolean + */ + private $useExternalStore = false; + + /** + * @param LoadBalancer $dbLoadBalancer A load balancer for acquiring database connections + * @param WANObjectCache $cache A cache manager for caching blobs + * @param bool|string $wikiId The ID of the target wiki database. Use false for the local wiki. + */ + public function __construct( + LoadBalancer $dbLoadBalancer, + WANObjectCache $cache, + $wikiId = false + ) { + $this->dbLoadBalancer = $dbLoadBalancer; + $this->cache = $cache; + $this->wikiId = $wikiId; + } + + /** + * @return int time for which blobs can be cached, in seconds + */ + public function getCacheExpiry() { + return $this->cacheExpiry; + } + + /** + * @param int $cacheExpiry time for which blobs can be cached, in seconds + */ + public function setCacheExpiry( $cacheExpiry ) { + Assert::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' ); + + $this->cacheExpiry = $cacheExpiry; + } + + /** + * @return bool whether blobs should be compressed for storage + */ + public function getCompressBlobs() { + return $this->compressBlobs; + } + + /** + * @param bool $compressBlobs whether blobs should be compressed for storage + */ + public function setCompressBlobs( $compressBlobs ) { + $this->compressBlobs = $compressBlobs; + } + + /** + * @return false|string The legacy encoding to assume for blobs that are not marked as utf8. + * False means handling of legacy encoding is disabled, and utf8 assumed. + */ + public function getLegacyEncoding() { + return $this->legacyEncoding; + } + + /** + * @return Language|null The locale to use when decoding from a legacy encoding, or null + * if handling of legacy encoding is disabled. + */ + public function getLegacyEncodingConversionLang() { + return $this->legacyEncodingConversionLang; + } + + /** + * @param string $legacyEncoding The legacy encoding to assume for blobs that are + * not marked as utf8. + * @param Language $language The locale to use when decoding from a legacy encoding. + */ + public function setLegacyEncoding( $legacyEncoding, Language $language ) { + Assert::parameterType( 'string', $legacyEncoding, '$legacyEncoding' ); + + $this->legacyEncoding = $legacyEncoding; + $this->legacyEncodingConversionLang = $language; + } + + /** + * @return bool Whether to use the ExternalStore mechanism for storing blobs. + */ + public function getUseExternalStore() { + return $this->useExternalStore; + } + + /** + * @param bool $useExternalStore Whether to use the ExternalStore mechanism for storing blobs. + */ + public function setUseExternalStore( $useExternalStore ) { + Assert::parameterType( 'boolean', $useExternalStore, '$useExternalStore' ); + + $this->useExternalStore = $useExternalStore; + } + + /** + * @return LoadBalancer + */ + private function getDBLoadBalancer() { + return $this->dbLoadBalancer; + } + + /** + * @param int $index A database index, like DB_MASTER or DB_REPLICA + * + * @return IDatabase + */ + private function getDBConnection( $index ) { + $lb = $this->getDBLoadBalancer(); + return $lb->getConnection( $index, [], $this->wikiId ); + } + + /** + * Stores an arbitrary blob of data and returns an address that can be used with + * getBlob() to retrieve the same blob of data, + * + * @param string $data + * @param array $hints An array of hints. + * + * @throws BlobAccessException + * @return string an address that can be used with getBlob() to retrieve the data. + */ + public function storeBlob( $data, $hints = [] ) { + try { + $flags = $this->compressData( $data ); + + # Write to external storage if required + if ( $this->useExternalStore ) { + // Store and get the URL + $data = ExternalStore::insertToDefault( $data ); + if ( !$data ) { + throw new BlobAccessException( "Failed to store text to external storage" ); + } + if ( $flags ) { + $flags .= ','; + } + $flags .= 'external'; + + // TODO: we could also return an address for the external store directly here. + // That would mean bypassing the text table entirely when the external store is + // used. We'll need to assess expected fallout before doing that. + } + + $dbw = $this->getDBConnection( DB_MASTER ); + + $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' ); + $dbw->insert( + 'text', + [ + 'old_id' => $old_id, + 'old_text' => $data, + 'old_flags' => $flags, + ], + __METHOD__ + ); + + $textId = $dbw->insertId(); + + return 'tt:' . $textId; + } catch ( MWException $e ) { + throw new BlobAccessException( $e->getMessage(), 0, $e ); + } + } + + /** + * Retrieve a blob, given an address. + * Currently hardcoded to the 'text' table storage engine. + * + * MCR migration note: this replaces Revision::loadText + * + * @param string $blobAddress + * @param int $queryFlags + * + * @throws BlobAccessException + * @return string + */ + public function getBlob( $blobAddress, $queryFlags = 0 ) { + Assert::parameterType( 'string', $blobAddress, '$blobAddress' ); + + // No negative caching; negative hits on text rows may be due to corrupted replica DBs + $blob = $this->cache->getWithSetCallback( + // TODO: change key, since this is not necessarily revision text! + $this->cache->makeKey( 'revisiontext', 'textid', $blobAddress ), + $this->getCacheTTL(), + function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags ) { + list( $index ) = DBAccessObjectUtils::getDBOptions( $queryFlags ); + $setOpts += Database::getCacheSetOptions( $this->getDBConnection( $index ) ); + + return $this->fetchBlob( $blobAddress, $queryFlags ); + }, + [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ] + ); + + if ( $blob === false ) { + throw new BlobAccessException( 'Failed to load blob from address ' . $blobAddress ); + } + + return $blob; + } + + /** + * MCR migration note: this corresponds to Revision::fetchText + * + * @param string $blobAddress + * @param int $queryFlags + * + * @throw BlobAccessException + * @return string|false + */ + private function fetchBlob( $blobAddress, $queryFlags ) { + list( $schema, $id, ) = self::splitBlobAddress( $blobAddress ); + + //TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL! + if ( $schema === 'tt' ) { + $textId = intval( $id ); + } else { + // XXX: change to better exceptions! That makes migration more difficult, though. + throw new BlobAccessException( "Unknown blob address schema: $schema" ); + } + + if ( !$textId || $id !== (string)$textId ) { + // XXX: change to better exceptions! That makes migration more difficult, though. + throw new BlobAccessException( "Bad blob address: $blobAddress" ); + } + + // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables + // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases. + $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST ) + ? self::READ_LATEST_IMMUTABLE + : 0; + + list( $index, $options, $fallbackIndex, $fallbackOptions ) = + DBAccessObjectUtils::getDBOptions( $queryFlags ); + + // Text data is immutable; check replica DBs first. + $row = $this->getDBConnection( $index )->selectRow( + 'text', + [ 'old_text', 'old_flags' ], + [ 'old_id' => $textId ], + __METHOD__, + $options + ); + + // Fallback to DB_MASTER in some cases if the row was not found, using the appropriate + // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ. + if ( !$row && $fallbackIndex !== null ) { + $row = $this->getDBConnection( $fallbackIndex )->selectRow( + 'text', + [ 'old_text', 'old_flags' ], + [ 'old_id' => $textId ], + __METHOD__, + $fallbackOptions + ); + } + + if ( !$row ) { + wfWarn( __METHOD__ . ": No text row with ID $textId." ); + return false; + } + + $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress ); + + if ( $blob === false ) { + wfWarn( __METHOD__ . ": Bad data in text row $textId." ); + return false; + } + + return $blob; + } + + /** + * Expand a raw data blob according to the flags given. + * + * MCR migration note: this replaces Revision::getRevisionText + * + * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead. + * @todo make this private, there should be no need to use this method outside this class. + * + * @param string $raw The raw blob data, to be processed according to $flags. + * May be the blob itself, or the blob compressed, or just the address + * of the actual blob, depending on $flags. + * @param string|string[] $flags Blob flags, such as 'external' or 'gzip'. + * Note that not including 'utf-8' in $flags will cause the data to be decoded + * according to the legacy encoding specified via setLegacyEncoding. + * @param string|null $cacheKey May be used for caching if given + * + * @return false|string The expanded blob or false on failure + */ + public function expandBlob( $raw, $flags, $cacheKey = null ) { + if ( is_string( $flags ) ) { + $flags = explode( ',', $flags ); + } + + // Use external methods for external objects, text in table is URL-only then + if ( in_array( 'external', $flags ) ) { + $url = $raw; + $parts = explode( '://', $url, 2 ); + if ( count( $parts ) == 1 || $parts[1] == '' ) { + return false; + } + + if ( $cacheKey && $this->wikiId === false ) { + // Make use of the wiki-local revision text cache. + // The cached value should be decompressed, so handle that and return here. + // NOTE: we rely on $this->cache being the right cache for $this->wikiId! + return $this->cache->getWithSetCallback( + // TODO: change key, since this is not necessarily revision text! + $this->cache->makeKey( 'revisiontext', 'textid', $cacheKey ), + $this->getCacheTTL(), + function () use ( $url, $flags ) { + // No negative caching per BlobStore::getBlob() + $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] ); + + return $this->decompressData( $blob, $flags ); + }, + [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ] + ); + } else { + $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] ); + return $this->decompressData( $blob, $flags ); + } + } else { + return $this->decompressData( $raw, $flags ); + } + } + + /** + * If $wgCompressRevisions is enabled, we will compress data. + * The input string is modified in place. + * Return value is the flags field: contains 'gzip' if the + * data is compressed, and 'utf-8' if we're saving in UTF-8 + * mode. + * + * MCR migration note: this replaces Revision::compressRevisionText + * + * @note direct use is deprecated! + * @todo make this private, there should be no need to use this method outside this class. + * + * @param mixed &$blob Reference to a text + * + * @return string + */ + public function compressData( &$blob ) { + $blobFlags = []; + + // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData(). + // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be + // risky, since $this->legacyEncoding being set in the future would lead to data corruption. + $blobFlags[] = 'utf-8'; + + if ( $this->compressBlobs ) { + if ( function_exists( 'gzdeflate' ) ) { + $deflated = gzdeflate( $blob ); + + if ( $deflated === false ) { + wfLogWarning( __METHOD__ . ': gzdeflate() failed' ); + } else { + $blob = $deflated; + $blobFlags[] = 'gzip'; + } + } else { + wfDebug( __METHOD__ . " -- no zlib support, not compressing\n" ); + } + } + return implode( ',', $blobFlags ); + } + + /** + * Re-converts revision text according to its flags. + * + * MCR migration note: this replaces Revision::decompressRevisionText + * + * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead. + * @todo make this private, there should be no need to use this method outside this class. + * + * @param mixed $blob Reference to a text + * @param array $blobFlags Compression flags, such as 'gzip'. + * Note that not including 'utf-8' in $blobFlags will cause the data to be decoded + * according to the legacy encoding specified via setLegacyEncoding. + * + * @return string|bool Decompressed text, or false on failure + */ + public function decompressData( $blob, array $blobFlags ) { + if ( $blob === false ) { + // Text failed to be fetched; nothing to do + return false; + } + + if ( in_array( 'error', $blobFlags ) ) { + // Error row, return false + return false; + } + + if ( in_array( 'gzip', $blobFlags ) ) { + # Deal with optional compression of archived pages. + # This can be done periodically via maintenance/compressOld.php, and + # as pages are saved if $wgCompressRevisions is set. + $blob = gzinflate( $blob ); + + if ( $blob === false ) { + wfLogWarning( __METHOD__ . ': gzinflate() failed' ); + return false; + } + } + + if ( in_array( 'object', $blobFlags ) ) { + # Generic compressed storage + $obj = unserialize( $blob ); + if ( !is_object( $obj ) ) { + // Invalid object + return false; + } + $blob = $obj->getText(); + } + + // Needed to support old revisions left over from from the 1.4 / 1.5 migration. + if ( $blob !== false && $this->legacyEncoding && $this->legacyEncodingConversionLang + && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags ) + ) { + # Old revisions kept around in a legacy encoding? + # Upconvert on demand. + # ("utf8" checked for compatibility with some broken + # conversion scripts 2008-12-30) + $blob = $this->legacyEncodingConversionLang->iconv( $this->legacyEncoding, 'UTF-8', $blob ); + } + + return $blob; + } + + /** + * Get the text cache TTL + * + * MCR migration note: this replaces Revision::getCacheTTL + * + * @return int + */ + private function getCacheTTL() { + if ( $this->cache->getQoS( WANObjectCache::ATTR_EMULATION ) + <= WANObjectCache::QOS_EMULATION_SQL + ) { + // Do not cache RDBMs blobs in...the RDBMs store + $ttl = WANObjectCache::TTL_UNCACHEABLE; + } else { + $ttl = $this->cacheExpiry ?: WANObjectCache::TTL_UNCACHEABLE; + } + + return $ttl; + } + + /** + * Returns an ID corresponding to the old_id field in the text table, corresponding + * to the given $address. + * + * Currently, $address must start with 'tt:' followed by a decimal integer representing + * the old_id; if $address does not start with 'tt:', null is returned. However, + * the implementation may change to insert rows into the text table on the fly. + * + * @note This method exists for use with the text table based storage schema. + * It should not be assumed that is will function with all future kinds of content addresses. + * + * @deprecated since 1.31, so not assume that all blob addresses refer to a row in the text + * table. This method should become private once the relevant refactoring in WikiPage is + * complete. + * + * @param string $address + * + * @return int|null + */ + public function getTextIdFromAddress( $address ) { + list( $schema, $id, ) = self::splitBlobAddress( $address ); + + if ( $schema !== 'tt' ) { + return null; + } + + $textId = intval( $id ); + + if ( !$textId || $id !== (string)$textId ) { + throw new InvalidArgumentException( "Malformed text_id: $id" ); + } + + return $textId; + } + + /** + * Splits a blob address into three parts: the schema, the ID, and parameters/flags. + * + * @param string $address + * + * @throws InvalidArgumentException + * @return array [ $schema, $id, $parameters ], with $parameters being an assoc array. + */ + private static function splitBlobAddress( $address ) { + if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) { + throw new InvalidArgumentException( "Bad blob address: $address" ); + } + + $schema = strtolower( $m[1] ); + $id = $m[2]; + $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : []; + + return [ $schema, $id, $parameters ]; + } + + public function isReadOnly() { + if ( $this->useExternalStore && ExternalStore::defaultStoresAreReadOnly() ) { + return true; + } + + return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false ); + } +} |