diff options
author | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
---|---|---|
committer | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
commit | fc7369835258467bf97eb64f184b93691f9a9fd5 (patch) | |
tree | daabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/includes/Storage |
first commit
Diffstat (limited to 'www/wiki/includes/Storage')
19 files changed, 5706 insertions, 0 deletions
diff --git a/www/wiki/includes/Storage/BlobAccessException.php b/www/wiki/includes/Storage/BlobAccessException.php new file mode 100644 index 00000000..ffc5ecab --- /dev/null +++ b/www/wiki/includes/Storage/BlobAccessException.php @@ -0,0 +1,34 @@ +<?php +/** + * Exception representing a failure to look up a revision. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +use RuntimeException; + +/** + * Exception representing a failure to access a data blob. + * + * @since 1.31 + */ +class BlobAccessException extends RuntimeException { + +} diff --git a/www/wiki/includes/Storage/BlobStore.php b/www/wiki/includes/Storage/BlobStore.php new file mode 100644 index 00000000..8b1112b2 --- /dev/null +++ b/www/wiki/includes/Storage/BlobStore.php @@ -0,0 +1,119 @@ +<?php +/** + * Service for loading and storing data blobs. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +/** + * Service for loading and storing data blobs. + * + * @note This was written to act as a drop-in replacement for the corresponding + * static methods in Revision. + * + * @since 1.31 + */ +interface BlobStore { + + /** + * Hint key for use with storeBlob, indicating the general role the block + * takes in the application. For instance, it should be "page-content" if + * the blob represents a Content object. + */ + const DESIGNATION_HINT = 'designation'; + + /** + * Hint key for use with storeBlob, indicating the page the blob is associated with. + * This may be used for sharding. + */ + const PAGE_HINT = 'page_id'; + + /** + * Hint key for use with storeBlob, indicating the slot the blob is associated with. + * May be relevant for reference counting. + */ + const ROLE_HINT = 'role_name'; + + /** + * Hint key for use with storeBlob, indicating the revision the blob is associated with. + * This may be used for differential storage and reference counting. + */ + const REVISION_HINT = 'rev_id'; + + /** + * Hint key for use with storeBlob, indicating the parent revision of the revision + * the blob is associated with. This may be used for differential storage. + */ + const PARENT_HINT = 'rev_parent_id'; + + /** + * Hint key for use with storeBlob, providing the SHA1 hash of the blob as passed to the + * method. This can be used to avoid re-calculating the hash if it is needed by the BlobStore. + */ + const SHA1_HINT = 'cont_sha1'; + + /** + * Hint key for use with storeBlob, indicating the model of the content encoded in the + * given blob. May be used to implement optimized storage for some well known models. + */ + const MODEL_HINT = 'cont_model'; + + /** + * Hint key for use with storeBlob, indicating the serialization format used to create + * the blob, as a MIME type. May be used for optimized storage in the underlying database. + */ + const FORMAT_HINT = 'cont_format'; + + /** + * Retrieve a blob, given an address. + * + * MCR migration note: this replaces Revision::loadText + * + * @param string $blobAddress The blob address as returned by storeBlob(), + * such as "tt:12345" or "ex:DB://s16/456/9876". + * @param int $queryFlags See IDBAccessObject. + * + * @throws BlobAccessException + * @return string binary blob data + */ + public function getBlob( $blobAddress, $queryFlags = 0 ); + + /** + * Stores an arbitrary blob of data and returns an address that can be used with + * getBlob() to retrieve the same blob of data, + * + * @param string $data raw binary data + * @param array $hints An array of hints. Implementations may use the hints to optimize storage. + * All hints are optional, supported hints depend on the implementation. Hint names by + * convention correspond to the names of fields in the database. Callers are encouraged to + * provide the well known hints as defined by the XXX_HINT constants. + * + * @throws BlobAccessException + * @return string an address that can be used with getBlob() to retrieve the data. + */ + public function storeBlob( $data, $hints = [] ); + + /** + * Check if the blob metadata or backing blob data store is read-only + * + * @return bool + */ + public function isReadOnly(); +} diff --git a/www/wiki/includes/Storage/BlobStoreFactory.php b/www/wiki/includes/Storage/BlobStoreFactory.php new file mode 100644 index 00000000..63ca74de --- /dev/null +++ b/www/wiki/includes/Storage/BlobStoreFactory.php @@ -0,0 +1,105 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +use Config; +use Language; +use WANObjectCache; +use Wikimedia\Rdbms\LoadBalancer; + +/** + * Service for instantiating BlobStores + * + * This can be used to create BlobStore objects for other wikis. + * + * @since 1.31 + */ +class BlobStoreFactory { + + /** + * @var LoadBalancer + */ + private $loadBalancer; + + /** + * @var WANObjectCache + */ + private $cache; + + /** + * @var Config + */ + private $config; + + /** + * @var Language + */ + private $contLang; + + public function __construct( + LoadBalancer $loadBalancer, + WANObjectCache $cache, + Config $mainConfig, + Language $contLang + ) { + $this->loadBalancer = $loadBalancer; + $this->cache = $cache; + $this->config = $mainConfig; + $this->contLang = $contLang; + } + + /** + * @since 1.31 + * + * @param bool|string $wikiId The ID of the target wiki database. Use false for the local wiki. + * + * @return BlobStore + */ + public function newBlobStore( $wikiId = false ) { + return $this->newSqlBlobStore( $wikiId ); + } + + /** + * @internal Please call newBlobStore and use the BlobStore interface. + * + * @param bool|string $wikiId The ID of the target wiki database. Use false for the local wiki. + * + * @return SqlBlobStore + */ + public function newSqlBlobStore( $wikiId = false ) { + $store = new SqlBlobStore( + $this->loadBalancer, + $this->cache, + $wikiId + ); + + $store->setCompressBlobs( $this->config->get( 'CompressRevisions' ) ); + $store->setCacheExpiry( $this->config->get( 'RevisionCacheExpiry' ) ); + $store->setUseExternalStore( $this->config->get( 'DefaultExternalStore' ) !== false ); + + if ( $this->config->get( 'LegacyEncoding' ) ) { + $store->setLegacyEncoding( $this->config->get( 'LegacyEncoding' ), $this->contLang ); + } + + return $store; + } + +} diff --git a/www/wiki/includes/Storage/IncompleteRevisionException.php b/www/wiki/includes/Storage/IncompleteRevisionException.php new file mode 100644 index 00000000..bf45b012 --- /dev/null +++ b/www/wiki/includes/Storage/IncompleteRevisionException.php @@ -0,0 +1,32 @@ +<?php +/** + * Exception representing a failure to look up a revision. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +/** + * Exception throw when trying to access undefined fields on an incomplete RevisionRecord. + * + * @since 1.31 + */ +class IncompleteRevisionException extends RevisionAccessException { + +} diff --git a/www/wiki/includes/Storage/MutableRevisionRecord.php b/www/wiki/includes/Storage/MutableRevisionRecord.php new file mode 100644 index 00000000..a259ae0b --- /dev/null +++ b/www/wiki/includes/Storage/MutableRevisionRecord.php @@ -0,0 +1,328 @@ +<?php +/** + * Mutable RevisionRecord implementation, for building new revision entries programmatically. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +use CommentStoreComment; +use Content; +use InvalidArgumentException; +use MediaWiki\User\UserIdentity; +use MWException; +use Title; +use Wikimedia\Assert\Assert; + +/** + * Mutable RevisionRecord implementation, for building new revision entries programmatically. + * Provides setters for all fields. + * + * @since 1.31 + */ +class MutableRevisionRecord extends RevisionRecord { + + /** + * Returns an incomplete MutableRevisionRecord which uses $parent as its + * parent revision, and inherits all slots form it. If saved unchanged, + * the new revision will act as a null-revision. + * + * @param RevisionRecord $parent + * @param CommentStoreComment $comment + * @param UserIdentity $user + * @param string $timestamp + * + * @return MutableRevisionRecord + */ + public static function newFromParentRevision( + RevisionRecord $parent, + CommentStoreComment $comment, + UserIdentity $user, + $timestamp + ) { + // TODO: ideally, we wouldn't need a Title here + $title = Title::newFromLinkTarget( $parent->getPageAsLinkTarget() ); + $rev = new MutableRevisionRecord( $title, $parent->getWikiId() ); + + $rev->setComment( $comment ); + $rev->setUser( $user ); + $rev->setTimestamp( $timestamp ); + + foreach ( $parent->getSlotRoles() as $role ) { + $slot = $parent->getSlot( $role, self::RAW ); + $rev->inheritSlot( $slot ); + } + + $rev->setPageId( $parent->getPageId() ); + $rev->setParentId( $parent->getId() ); + + return $rev; + } + + /** + * @note Avoid calling this constructor directly. Use the appropriate methods + * in RevisionStore instead. + * + * @param Title $title The title of the page this Revision is associated with. + * @param bool|string $wikiId the wiki ID of the site this Revision belongs to, + * or false for the local site. + * + * @throws MWException + */ + function __construct( Title $title, $wikiId = false ) { + $slots = new MutableRevisionSlots(); + + parent::__construct( $title, $slots, $wikiId ); + + $this->mSlots = $slots; // redundant, but nice for static analysis + } + + /** + * @param int $parentId + */ + public function setParentId( $parentId ) { + Assert::parameterType( 'integer', $parentId, '$parentId' ); + + $this->mParentId = $parentId; + } + + /** + * Sets the given slot. If a slot with the same role is already present in the revision, + * it is replaced. + * + * @note This can only be used with a fresh "unattached" SlotRecord. Calling code that has a + * SlotRecord from another revision should use inheritSlot(). Calling code that has access to + * a Content object can use setContent(). + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @note Calling this method will cause the revision size and hash to be re-calculated upon + * the next call to getSize() and getSha1(), respectively. + * + * @param SlotRecord $slot + */ + public function setSlot( SlotRecord $slot ) { + if ( $slot->hasRevision() && $slot->getRevision() !== $this->getId() ) { + throw new InvalidArgumentException( + 'The given slot must be an unsaved, unattached one. ' + . 'This slot is already attached to revision ' . $slot->getRevision() . '. ' + . 'Use inheritSlot() instead to preserve a slot from a previous revision.' + ); + } + + $this->mSlots->setSlot( $slot ); + $this->resetAggregateValues(); + } + + /** + * "Inherits" the given slot's content. + * + * If a slot with the same role is already present in the revision, it is replaced. + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @param SlotRecord $parentSlot + */ + public function inheritSlot( SlotRecord $parentSlot ) { + $slot = SlotRecord::newInherited( $parentSlot ); + $this->setSlot( $slot ); + } + + /** + * Sets the content for the slot with the given role. + * + * If a slot with the same role is already present in the revision, it is replaced. + * Calling code that has access to a SlotRecord can use inheritSlot() instead. + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @note Calling this method will cause the revision size and hash to be re-calculated upon + * the next call to getSize() and getSha1(), respectively. + * + * @param string $role + * @param Content $content + */ + public function setContent( $role, Content $content ) { + $this->mSlots->setContent( $role, $content ); + $this->resetAggregateValues(); + } + + /** + * Removes the slot with the given role from this revision. + * This effectively ends the "stream" with that role on the revision's page. + * Future revisions will no longer inherit this slot, unless it is added back explicitly. + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @note Calling this method will cause the revision size and hash to be re-calculated upon + * the next call to getSize() and getSha1(), respectively. + * + * @param string $role + */ + public function removeSlot( $role ) { + $this->mSlots->removeSlot( $role ); + $this->resetAggregateValues(); + } + + /** + * @param CommentStoreComment $comment + */ + public function setComment( CommentStoreComment $comment ) { + $this->mComment = $comment; + } + + /** + * Set revision hash, for optimization. Prevents getSha1() from re-calculating the hash. + * + * @note This should only be used if the calling code is sure that the given hash is correct + * for the revision's content, and there is no chance of the content being manipulated + * later. When in doubt, this method should not be called. + * + * @param string $sha1 SHA1 hash as a base36 string. + */ + public function setSha1( $sha1 ) { + Assert::parameterType( 'string', $sha1, '$sha1' ); + + $this->mSha1 = $sha1; + } + + /** + * Set nominal revision size, for optimization. Prevents getSize() from re-calculating the size. + * + * @note This should only be used if the calling code is sure that the given size is correct + * for the revision's content, and there is no chance of the content being manipulated + * later. When in doubt, this method should not be called. + * + * @param int $size nominal size in bogo-bytes + */ + public function setSize( $size ) { + Assert::parameterType( 'integer', $size, '$size' ); + + $this->mSize = $size; + } + + /** + * @param int $visibility + */ + public function setVisibility( $visibility ) { + Assert::parameterType( 'integer', $visibility, '$visibility' ); + + $this->mDeleted = $visibility; + } + + /** + * @param string $timestamp A timestamp understood by wfTimestamp + */ + public function setTimestamp( $timestamp ) { + Assert::parameterType( 'string', $timestamp, '$timestamp' ); + + $this->mTimestamp = wfTimestamp( TS_MW, $timestamp ); + } + + /** + * @param bool $minorEdit + */ + public function setMinorEdit( $minorEdit ) { + Assert::parameterType( 'boolean', $minorEdit, '$minorEdit' ); + + $this->mMinorEdit = $minorEdit; + } + + /** + * Set the revision ID. + * + * MCR migration note: this replaces Revision::setId() + * + * @warning Use this with care, especially when preparing a revision for insertion + * into the database! The revision ID should only be fixed in special cases + * like preserving the original ID when restoring a revision. + * + * @param int $id + */ + public function setId( $id ) { + Assert::parameterType( 'integer', $id, '$id' ); + + $this->mId = $id; + } + + /** + * Sets the user identity associated with the revision + * + * @param UserIdentity $user + */ + public function setUser( UserIdentity $user ) { + $this->mUser = $user; + } + + /** + * @param int $pageId + */ + public function setPageId( $pageId ) { + Assert::parameterType( 'integer', $pageId, '$pageId' ); + + if ( $this->mTitle->exists() && $pageId !== $this->mTitle->getArticleID() ) { + throw new InvalidArgumentException( + 'The given Title does not belong to page ID ' . $this->mPageId + ); + } + + $this->mPageId = $pageId; + } + + /** + * Returns the nominal size of this revision. + * + * MCR migration note: this replaces Revision::getSize + * + * @return int The nominal size, may be computed on the fly if not yet known. + */ + public function getSize() { + // If not known, re-calculate and remember. Will be reset when slots change. + if ( $this->mSize === null ) { + $this->mSize = $this->mSlots->computeSize(); + } + + return $this->mSize; + } + + /** + * Returns the base36 sha1 of this revision. + * + * MCR migration note: this replaces Revision::getSha1 + * + * @return string The revision hash, may be computed on the fly if not yet known. + */ + public function getSha1() { + // If not known, re-calculate and remember. Will be reset when slots change. + if ( $this->mSha1 === null ) { + $this->mSha1 = $this->mSlots->computeSha1(); + } + + return $this->mSha1; + } + + /** + * Invalidate cached aggregate values such as hash and size. + */ + private function resetAggregateValues() { + $this->mSize = null; + $this->mSha1 = null; + } + +} diff --git a/www/wiki/includes/Storage/MutableRevisionSlots.php b/www/wiki/includes/Storage/MutableRevisionSlots.php new file mode 100644 index 00000000..2e675c89 --- /dev/null +++ b/www/wiki/includes/Storage/MutableRevisionSlots.php @@ -0,0 +1,137 @@ +<?php +/** + * Mutable version of RevisionSlots, for constructing a new revision. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +use Content; + +/** + * Mutable version of RevisionSlots, for constructing a new revision. + * + * @since 1.31 + */ +class MutableRevisionSlots extends RevisionSlots { + + /** + * Constructs a MutableRevisionSlots that inherits from the given + * list of slots. + * + * @param SlotRecord[] $slots + * + * @return MutableRevisionSlots + */ + public static function newFromParentRevisionSlots( array $slots ) { + $inherited = []; + foreach ( $slots as $slot ) { + $role = $slot->getRole(); + $inherited[$role] = SlotRecord::newInherited( $slot ); + } + + return new MutableRevisionSlots( $inherited ); + } + + /** + * @param SlotRecord[] $slots An array of SlotRecords. + */ + public function __construct( array $slots = [] ) { + parent::__construct( $slots ); + } + + /** + * Sets the given slot. + * If a slot with the same role is already present, it is replaced. + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @param SlotRecord $slot + */ + public function setSlot( SlotRecord $slot ) { + if ( !is_array( $this->slots ) ) { + $this->getSlots(); // initialize $this->slots + } + + $role = $slot->getRole(); + $this->slots[$role] = $slot; + } + + /** + * Sets the content for the slot with the given role. + * If a slot with the same role is already present, it is replaced. + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @param string $role + * @param Content $content + */ + public function setContent( $role, Content $content ) { + $slot = SlotRecord::newUnsaved( $role, $content ); + $this->setSlot( $slot ); + } + + /** + * Remove the slot for the given role, discontinue the corresponding stream. + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @param string $role + */ + public function removeSlot( $role ) { + if ( !is_array( $this->slots ) ) { + $this->getSlots(); // initialize $this->slots + } + + unset( $this->slots[$role] ); + } + + /** + * Return all slots that are not inherited. + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @return SlotRecord[] + */ + public function getTouchedSlots() { + return array_filter( + $this->getSlots(), + function ( SlotRecord $slot ) { + return !$slot->isInherited(); + } + ); + } + + /** + * Return all slots that are inherited. + * + * @note This may cause the slot meta-data for the revision to be lazy-loaded. + * + * @return SlotRecord[] + */ + public function getInheritedSlots() { + return array_filter( + $this->getSlots(), + function ( SlotRecord $slot ) { + return $slot->isInherited(); + } + ); + } + +} diff --git a/www/wiki/includes/Storage/NameTableAccessException.php b/www/wiki/includes/Storage/NameTableAccessException.php new file mode 100644 index 00000000..393cb1fa --- /dev/null +++ b/www/wiki/includes/Storage/NameTableAccessException.php @@ -0,0 +1,45 @@ +<?php +/** + * Exception representing a failure to look up a row from a name table. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +use RuntimeException; + +/** + * Exception representing a failure to look up a row from a name table. + * + * @since 1.31 + */ +class NameTableAccessException extends RuntimeException { + + /** + * @param string $tableName + * @param string $accessType + * @param string|int $accessValue + * @return NameTableAccessException + */ + public static function newFromDetails( $tableName, $accessType, $accessValue ) { + $message = "Failed to access name from ${tableName} using ${accessType} = ${accessValue}"; + return new self( $message ); + } + +} diff --git a/www/wiki/includes/Storage/NameTableStore.php b/www/wiki/includes/Storage/NameTableStore.php new file mode 100644 index 00000000..ebce3da9 --- /dev/null +++ b/www/wiki/includes/Storage/NameTableStore.php @@ -0,0 +1,366 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +use IExpiringStore; +use Psr\Log\LoggerInterface; +use WANObjectCache; +use Wikimedia\Assert\Assert; +use Wikimedia\Rdbms\Database; +use Wikimedia\Rdbms\IDatabase; +use Wikimedia\Rdbms\LoadBalancer; + +/** + * @author Addshore + * @since 1.31 + */ +class NameTableStore { + + /** @var LoadBalancer */ + private $loadBalancer; + + /** @var WANObjectCache */ + private $cache; + + /** @var LoggerInterface */ + private $logger; + + /** @var string[] */ + private $tableCache = null; + + /** @var bool|string */ + private $wikiId = false; + + /** @var int */ + private $cacheTTL; + + /** @var string */ + private $table; + /** @var string */ + private $idField; + /** @var string */ + private $nameField; + /** @var null|callable */ + private $normalizationCallback = null; + + /** + * @param LoadBalancer $dbLoadBalancer A load balancer for acquiring database connections + * @param WANObjectCache $cache A cache manager for caching data + * @param LoggerInterface $logger + * @param string $table + * @param string $idField + * @param string $nameField + * @param callable $normalizationCallback Normalization to be applied to names before being + * saved or queried. This should be a callback that accepts and returns a single string. + * @param bool|string $wikiId The ID of the target wiki database. Use false for the local wiki. + */ + public function __construct( + LoadBalancer $dbLoadBalancer, + WANObjectCache $cache, + LoggerInterface $logger, + $table, + $idField, + $nameField, + callable $normalizationCallback = null, + $wikiId = false + ) { + $this->loadBalancer = $dbLoadBalancer; + $this->cache = $cache; + $this->logger = $logger; + $this->table = $table; + $this->idField = $idField; + $this->nameField = $nameField; + $this->normalizationCallback = $normalizationCallback; + $this->wikiId = $wikiId; + $this->cacheTTL = IExpiringStore::TTL_MONTH; + } + + /** + * @param int $index A database index, like DB_MASTER or DB_REPLICA + * @param int $flags Database connection flags + * + * @return IDatabase + */ + private function getDBConnection( $index, $flags = 0 ) { + return $this->loadBalancer->getConnection( $index, [], $this->wikiId, $flags ); + } + + private function getCacheKey() { + return $this->cache->makeKey( 'NameTableSqlStore', $this->table, $this->wikiId ); + } + + /** + * @param string $name + * @return string + */ + private function normalizeName( $name ) { + if ( $this->normalizationCallback === null ) { + return $name; + } + return call_user_func( $this->normalizationCallback, $name ); + } + + /** + * Acquire the id of the given name. + * This creates a row in the table if it doesn't already exist. + * + * @param string $name + * @throws NameTableAccessException + * @return int + */ + public function acquireId( $name ) { + Assert::parameterType( 'string', $name, '$name' ); + $name = $this->normalizeName( $name ); + + $table = $this->getTableFromCachesOrReplica(); + $searchResult = array_search( $name, $table, true ); + if ( $searchResult === false ) { + $id = $this->store( $name ); + if ( $id === null ) { + // RACE: $name was already in the db, probably just inserted, so load from master + // Use DBO_TRX to avoid missing inserts due to other threads or REPEATABLE-READs + $table = $this->loadTable( + $this->getDBConnection( DB_MASTER, LoadBalancer::CONN_TRX_AUTOCOMMIT ) + ); + $searchResult = array_search( $name, $table, true ); + if ( $searchResult === false ) { + // Insert failed due to IGNORE flag, but DB_MASTER didn't give us the data + $m = "No insert possible but master didn't give us a record for " . + "'{$name}' in '{$this->table}'"; + $this->logger->error( $m ); + throw new NameTableAccessException( $m ); + } + $this->purgeWANCache( + function () { + $this->cache->reap( $this->getCacheKey(), INF ); + } + ); + } else { + $table[$id] = $name; + $searchResult = $id; + // As store returned an ID we know we inserted so delete from WAN cache + $this->purgeWANCache( + function () { + $this->cache->delete( $this->getCacheKey() ); + } + ); + } + $this->tableCache = $table; + } + + return $searchResult; + } + + /** + * Get the id of the given name. + * If the name doesn't exist this will throw. + * This should be used in cases where we believe the name already exists or want to check for + * existence. + * + * @param string $name + * @throws NameTableAccessException The name does not exist + * @return int Id + */ + public function getId( $name ) { + Assert::parameterType( 'string', $name, '$name' ); + $name = $this->normalizeName( $name ); + + $table = $this->getTableFromCachesOrReplica(); + $searchResult = array_search( $name, $table, true ); + + if ( $searchResult !== false ) { + return $searchResult; + } + + throw NameTableAccessException::newFromDetails( $this->table, 'name', $name ); + } + + /** + * Get the name of the given id. + * If the id doesn't exist this will throw. + * This should be used in cases where we believe the id already exists. + * + * Note: Calls to this method will result in a master select for non existing IDs. + * + * @param int $id + * @throws NameTableAccessException The id does not exist + * @return string name + */ + public function getName( $id ) { + Assert::parameterType( 'integer', $id, '$id' ); + + $table = $this->getTableFromCachesOrReplica(); + if ( array_key_exists( $id, $table ) ) { + return $table[$id]; + } + + $table = $this->cache->getWithSetCallback( + $this->getCacheKey(), + $this->cacheTTL, + function ( $oldValue, &$ttl, &$setOpts ) use ( $id ) { + // Check if cached value is up-to-date enough to have $id + if ( is_array( $oldValue ) && array_key_exists( $id, $oldValue ) ) { + // Completely leave the cache key alone + $ttl = WANObjectCache::TTL_UNCACHEABLE; + // Use the old value + return $oldValue; + } + // Regenerate from replica DB, and master DB if needed + foreach ( [ DB_REPLICA, DB_MASTER ] as $source ) { + // Log a fallback to master + if ( $source === DB_MASTER ) { + $this->logger->info( + __METHOD__ . 'falling back to master select from ' . + $this->table . ' with id ' . $id + ); + } + $db = $this->getDBConnection( $source ); + $cacheSetOpts = Database::getCacheSetOptions( $db ); + $table = $this->loadTable( $db ); + if ( array_key_exists( $id, $table ) ) { + break; // found it + } + } + // Use the value from last source checked + $setOpts += $cacheSetOpts; + + return $table; + }, + [ 'minAsOf' => INF ] // force callback run + ); + + $this->tableCache = $table; + + if ( array_key_exists( $id, $table ) ) { + return $table[$id]; + } + + throw NameTableAccessException::newFromDetails( $this->table, 'id', $id ); + } + + /** + * Get the whole table, in no particular order as a map of ids to names. + * This method could be subject to DB or cache lag. + * + * @return string[] keys are the name ids, values are the names themselves + * Example: [ 1 => 'foo', 3 => 'bar' ] + */ + public function getMap() { + return $this->getTableFromCachesOrReplica(); + } + + /** + * @return string[] + */ + private function getTableFromCachesOrReplica() { + if ( $this->tableCache !== null ) { + return $this->tableCache; + } + + $table = $this->cache->getWithSetCallback( + $this->getCacheKey(), + $this->cacheTTL, + function ( $oldValue, &$ttl, &$setOpts ) { + $dbr = $this->getDBConnection( DB_REPLICA ); + $setOpts += Database::getCacheSetOptions( $dbr ); + return $this->loadTable( $dbr ); + } + ); + + $this->tableCache = $table; + + return $table; + } + + /** + * Reap the WANCache entry for this table. + * + * @param callable $purgeCallback callback to 'purge' the WAN cache + */ + private function purgeWANCache( $purgeCallback ) { + // If the LB has no DB changes don't both with onTransactionPreCommitOrIdle + if ( !$this->loadBalancer->hasOrMadeRecentMasterChanges() ) { + $purgeCallback(); + return; + } + + $this->getDBConnection( DB_MASTER ) + ->onTransactionPreCommitOrIdle( $purgeCallback, __METHOD__ ); + } + + /** + * Gets the table from the db + * + * @param IDatabase $db + * + * @return string[] + */ + private function loadTable( IDatabase $db ) { + $result = $db->select( + $this->table, + [ + 'id' => $this->idField, + 'name' => $this->nameField + ], + [], + __METHOD__, + [ 'ORDER BY' => 'id' ] + ); + + $assocArray = []; + foreach ( $result as $row ) { + $assocArray[$row->id] = $row->name; + } + + return $assocArray; + } + + /** + * Stores the given name in the DB, returning the ID when an insert occurs. + * + * @param string $name + * @return int|null int if we know the ID, null if we don't + */ + private function store( $name ) { + Assert::parameterType( 'string', $name, '$name' ); + Assert::parameter( $name !== '', '$name', 'should not be an empty string' ); + // Note: this is only called internally so normalization of $name has already occurred. + + $dbw = $this->getDBConnection( DB_MASTER ); + + $dbw->insert( + $this->table, + [ $this->nameField => $name ], + __METHOD__, + [ 'IGNORE' ] + ); + + if ( $dbw->affectedRows() === 0 ) { + $this->logger->info( + 'Tried to insert name into table ' . $this->table . ', but value already existed.' + ); + return null; + } + + return $dbw->insertId(); + } + +} diff --git a/www/wiki/includes/Storage/RevisionAccessException.php b/www/wiki/includes/Storage/RevisionAccessException.php new file mode 100644 index 00000000..ee6efc0a --- /dev/null +++ b/www/wiki/includes/Storage/RevisionAccessException.php @@ -0,0 +1,34 @@ +<?php +/** + * Exception representing a failure to look up a revision. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +use RuntimeException; + +/** + * Exception representing a failure to look up a revision. + * + * @since 1.31 + */ +class RevisionAccessException extends RuntimeException { + +} diff --git a/www/wiki/includes/Storage/RevisionArchiveRecord.php b/www/wiki/includes/Storage/RevisionArchiveRecord.php new file mode 100644 index 00000000..213ee3cd --- /dev/null +++ b/www/wiki/includes/Storage/RevisionArchiveRecord.php @@ -0,0 +1,170 @@ +<?php +/** + * A RevisionRecord representing a revision of a deleted page persisted in the archive table. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +use CommentStoreComment; +use MediaWiki\User\UserIdentity; +use Title; +use User; +use Wikimedia\Assert\Assert; + +/** + * A RevisionRecord representing a revision of a deleted page persisted in the archive table. + * Most getters on RevisionArchiveRecord will never return null. However, getId() and + * getParentId() may indeed return null if this information was not stored when the archive entry + * was created. + * + * @since 1.31 + */ +class RevisionArchiveRecord extends RevisionRecord { + + /** + * @var int + */ + protected $mArchiveId; + + /** + * @note Avoid calling this constructor directly. Use the appropriate methods + * in RevisionStore instead. + * + * @param Title $title The title of the page this Revision is associated with. + * @param UserIdentity $user + * @param CommentStoreComment $comment + * @param object $row An archive table row. Use RevisionStore::getArchiveQueryInfo() to build + * a query that yields the required fields. + * @param RevisionSlots $slots The slots of this revision. + * @param bool|string $wikiId the wiki ID of the site this Revision belongs to, + * or false for the local site. + */ + function __construct( + Title $title, + UserIdentity $user, + CommentStoreComment $comment, + $row, + RevisionSlots $slots, + $wikiId = false + ) { + parent::__construct( $title, $slots, $wikiId ); + Assert::parameterType( 'object', $row, '$row' ); + + $timestamp = wfTimestamp( TS_MW, $row->ar_timestamp ); + Assert::parameter( is_string( $timestamp ), '$row->rev_timestamp', 'must be a valid timestamp' ); + + $this->mArchiveId = intval( $row->ar_id ); + + // NOTE: ar_page_id may be different from $this->mTitle->getArticleID() in some cases, + // notably when a partially restored page has been moved, and a new page has been created + // with the same title. Archive rows for that title will then have the wrong page id. + $this->mPageId = isset( $row->ar_page_id ) ? intval( $row->ar_page_id ) : $title->getArticleID(); + + // NOTE: ar_parent_id = 0 indicates that there is no parent revision, while null + // indicates that the parent revision is unknown. As per MW 1.31, the database schema + // allows ar_parent_id to be NULL. + $this->mParentId = isset( $row->ar_parent_id ) ? intval( $row->ar_parent_id ) : null; + $this->mId = isset( $row->ar_rev_id ) ? intval( $row->ar_rev_id ) : null; + $this->mComment = $comment; + $this->mUser = $user; + $this->mTimestamp = $timestamp; + $this->mMinorEdit = boolval( $row->ar_minor_edit ); + $this->mDeleted = intval( $row->ar_deleted ); + $this->mSize = isset( $row->ar_len ) ? intval( $row->ar_len ) : null; + $this->mSha1 = !empty( $row->ar_sha1 ) ? $row->ar_sha1 : null; + } + + /** + * Get archive row ID + * + * @return int + */ + public function getArchiveId() { + return $this->mArchiveId; + } + + /** + * @return int|null The revision id, or null if the original revision ID + * was not recorded in the archive table. + */ + public function getId() { + // overwritten just to refine the contract specification. + return parent::getId(); + } + + /** + * @throws RevisionAccessException if the size was unknown and could not be calculated. + * @return int The nominal revision size, never null. May be computed on the fly. + */ + public function getSize() { + // If length is null, calculate and remember it (potentially SLOW!). + // This is for compatibility with old database rows that don't have the field set. + if ( $this->mSize === null ) { + $this->mSize = $this->mSlots->computeSize(); + } + + return $this->mSize; + } + + /** + * @throws RevisionAccessException if the hash was unknown and could not be calculated. + * @return string The revision hash, never null. May be computed on the fly. + */ + public function getSha1() { + // If hash is null, calculate it and remember (potentially SLOW!) + // This is for compatibility with old database rows that don't have the field set. + if ( $this->mSha1 === null ) { + $this->mSha1 = $this->mSlots->computeSha1(); + } + + return $this->mSha1; + } + + /** + * @param int $audience + * @param User|null $user + * + * @return UserIdentity The identity of the revision author, null if access is forbidden. + */ + public function getUser( $audience = self::FOR_PUBLIC, User $user = null ) { + // overwritten just to add a guarantee to the contract + return parent::getUser( $audience, $user ); + } + + /** + * @param int $audience + * @param User|null $user + * + * @return CommentStoreComment The revision comment, null if access is forbidden. + */ + public function getComment( $audience = self::FOR_PUBLIC, User $user = null ) { + // overwritten just to add a guarantee to the contract + return parent::getComment( $audience, $user ); + } + + /** + * @return string never null + */ + public function getTimestamp() { + // overwritten just to add a guarantee to the contract + return parent::getTimestamp(); + } + +} diff --git a/www/wiki/includes/Storage/RevisionFactory.php b/www/wiki/includes/Storage/RevisionFactory.php new file mode 100644 index 00000000..86e8c06f --- /dev/null +++ b/www/wiki/includes/Storage/RevisionFactory.php @@ -0,0 +1,94 @@ +<?php +/** + * Service for constructing revision objects. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +use MWException; +use Title; + +/** + * Service for constructing revision objects. + * + * @since 1.31 + * + * @note This was written to act as a drop-in replacement for the corresponding + * static methods in Revision. + */ +interface RevisionFactory { + + /** + * Constructs a new RevisionRecord based on the given associative array following the MW1.29 + * database convention for the Revision constructor. + * + * MCR migration note: this replaces Revision::newFromRow + * + * @deprecated since 1.31. Use a MutableRevisionRecord instead. + * + * @param array $fields + * @param int $queryFlags Flags for lazy loading behavior, see IDBAccessObject::READ_XXX. + * @param Title|null $title + * + * @return MutableRevisionRecord + * @throws MWException + */ + public function newMutableRevisionFromArray( array $fields, $queryFlags = 0, Title $title = null ); + + /** + * Constructs a RevisionRecord given a database row and content slots. + * + * MCR migration note: this replaces Revision::newFromRow for rows based on the + * revision, slot, and content tables defined for MCR since MW1.31. + * + * @param object $row A query result row as a raw object. + * Use RevisionStore::getQueryInfo() to build a query that yields the required fields. + * @param int $queryFlags Flags for lazy loading behavior, see IDBAccessObject::READ_XXX. + * @param Title|null $title + * + * @return RevisionRecord + */ + public function newRevisionFromRow( $row, $queryFlags = 0, Title $title = null ); + + /** + * Make a fake revision object from an archive table row. This is queried + * for permissions or even inserted (as in Special:Undelete) + * + * MCR migration note: this replaces Revision::newFromArchiveRow + * + * @param object $row A query result row as a raw object. + * Use RevisionStore::getArchiveQueryInfo() to build a query that yields the + * required fields. + * @param int $queryFlags Flags for lazy loading behavior, see IDBAccessObject::READ_XXX. + * @param Title $title + * @param array $overrides An associative array that allows fields in $row to be overwritten. + * Keys in this array correspond to field names in $row without the "ar_" prefix, so + * $overrides['user'] will override $row->ar_user, etc. + * + * @return RevisionRecord + */ + public function newRevisionFromArchiveRow( + $row, + $queryFlags = 0, + Title $title = null, + array $overrides = [] + ); + +} diff --git a/www/wiki/includes/Storage/RevisionLookup.php b/www/wiki/includes/Storage/RevisionLookup.php new file mode 100644 index 00000000..45cd1841 --- /dev/null +++ b/www/wiki/includes/Storage/RevisionLookup.php @@ -0,0 +1,120 @@ +<?php +/** + * Service for looking up page revisions. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +use IDBAccessObject; +use MediaWiki\Linker\LinkTarget; +use Title; + +/** + * Service for looking up page revisions. + * + * @note This was written to act as a drop-in replacement for the corresponding + * static methods in Revision. + * + * @since 1.31 + */ +interface RevisionLookup extends IDBAccessObject { + + /** + * Load a page revision from a given revision ID number. + * Returns null if no such revision can be found. + * + * MCR migration note: this replaces Revision::newFromId + * + * $flags include: + * + * @param int $id + * @param int $flags bit field, see IDBAccessObject::READ_XXX + * @return RevisionRecord|null + */ + public function getRevisionById( $id, $flags = 0 ); + + /** + * Load either the current, or a specified, revision + * that's attached to a given link target. If not attached + * to that link target, will return null. + * + * MCR migration note: this replaces Revision::newFromTitle + * + * @param LinkTarget $linkTarget + * @param int $revId (optional) + * @param int $flags bit field, see IDBAccessObject::READ_XXX + * @return RevisionRecord|null + */ + public function getRevisionByTitle( LinkTarget $linkTarget, $revId = 0, $flags = 0 ); + + /** + * Load either the current, or a specified, revision + * that's attached to a given page ID. + * Returns null if no such revision can be found. + * + * MCR migration note: this replaces Revision::newFromPageId + * + * @param int $pageId + * @param int $revId (optional) + * @param int $flags bit field, see IDBAccessObject::READ_XXX + * @return RevisionRecord|null + */ + public function getRevisionByPageId( $pageId, $revId = 0, $flags = 0 ); + + /** + * Get previous revision for this title + * + * MCR migration note: this replaces Revision::getPrevious + * + * @param RevisionRecord $rev + * @param Title $title if known (optional) + * + * @return RevisionRecord|null + */ + public function getPreviousRevision( RevisionRecord $rev, Title $title = null ); + + /** + * Get next revision for this title + * + * MCR migration note: this replaces Revision::getNext + * + * @param RevisionRecord $rev + * @param Title $title if known (optional) + * + * @return RevisionRecord|null + */ + public function getNextRevision( RevisionRecord $rev, Title $title = null ); + + /** + * Load a revision based on a known page ID and current revision ID from the DB + * + * This method allows for the use of caching, though accessing anything that normally + * requires permission checks (aside from the text) will trigger a small DB lookup. + * + * MCR migration note: this replaces Revision::newKnownCurrent + * + * @param Title $title the associated page title + * @param int $revId current revision of this page + * + * @return RevisionRecord|bool Returns false if missing + */ + public function getKnownCurrentRevision( Title $title, $revId ); + +} diff --git a/www/wiki/includes/Storage/RevisionRecord.php b/www/wiki/includes/Storage/RevisionRecord.php new file mode 100644 index 00000000..6d83e1c1 --- /dev/null +++ b/www/wiki/includes/Storage/RevisionRecord.php @@ -0,0 +1,492 @@ +<?php +/** + * Page revision base class. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +use CommentStoreComment; +use Content; +use InvalidArgumentException; +use LogicException; +use MediaWiki\Linker\LinkTarget; +use MediaWiki\User\UserIdentity; +use MWException; +use Title; +use User; +use Wikimedia\Assert\Assert; + +/** + * Page revision base class. + * + * RevisionRecords are considered value objects, but they may use callbacks for lazy loading. + * Note that while the base class has no setters, subclasses may offer a mutable interface. + * + * @since 1.31 + */ +abstract class RevisionRecord { + + // RevisionRecord deletion constants + const DELETED_TEXT = 1; + const DELETED_COMMENT = 2; + const DELETED_USER = 4; + const DELETED_RESTRICTED = 8; + const SUPPRESSED_USER = 12; // convenience + const SUPPRESSED_ALL = 15; // convenience + + // Audience options for accessors + const FOR_PUBLIC = 1; + const FOR_THIS_USER = 2; + const RAW = 3; + + /** @var string Wiki ID; false means the current wiki */ + protected $mWiki = false; + /** @var int|null */ + protected $mId; + /** @var int|null */ + protected $mPageId; + /** @var UserIdentity|null */ + protected $mUser; + /** @var bool */ + protected $mMinorEdit = false; + /** @var string|null */ + protected $mTimestamp; + /** @var int using the DELETED_XXX and SUPPRESSED_XXX flags */ + protected $mDeleted = 0; + /** @var int|null */ + protected $mSize; + /** @var string|null */ + protected $mSha1; + /** @var int|null */ + protected $mParentId; + /** @var CommentStoreComment|null */ + protected $mComment; + + /** @var Title */ + protected $mTitle; // TODO: we only need the title for permission checks! + + /** @var RevisionSlots */ + protected $mSlots; + + /** + * @note Avoid calling this constructor directly. Use the appropriate methods + * in RevisionStore instead. + * + * @param Title $title The title of the page this Revision is associated with. + * @param RevisionSlots $slots The slots of this revision. + * @param bool|string $wikiId the wiki ID of the site this Revision belongs to, + * or false for the local site. + * + * @throws MWException + */ + function __construct( Title $title, RevisionSlots $slots, $wikiId = false ) { + Assert::parameterType( 'string|boolean', $wikiId, '$wikiId' ); + + $this->mTitle = $title; + $this->mSlots = $slots; + $this->mWiki = $wikiId; + + // XXX: this is a sensible default, but we may not have a Title object here in the future. + $this->mPageId = $title->getArticleID(); + } + + /** + * Implemented to defy serialization. + * + * @throws LogicException always + */ + public function __sleep() { + throw new LogicException( __CLASS__ . ' is not serializable.' ); + } + + /** + * @param RevisionRecord $rec + * + * @return bool True if this RevisionRecord is known to have same content as $rec. + * False if the content is different (or not known to be the same). + */ + public function hasSameContent( RevisionRecord $rec ) { + if ( $rec === $this ) { + return true; + } + + if ( $this->getId() !== null && $this->getId() === $rec->getId() ) { + return true; + } + + // check size before hash, since size is quicker to compute + if ( $this->getSize() !== $rec->getSize() ) { + return false; + } + + // instead of checking the hash, we could also check the content addresses of all slots. + + if ( $this->getSha1() === $rec->getSha1() ) { + return true; + } + + return false; + } + + /** + * Returns the Content of the given slot of this revision. + * Call getSlotNames() to get a list of available slots. + * + * Note that for mutable Content objects, each call to this method will return a + * fresh clone. + * + * MCR migration note: this replaces Revision::getContent + * + * @param string $role The role name of the desired slot + * @param int $audience + * @param User|null $user + * + * @throws RevisionAccessException if the slot does not exist or slot data + * could not be lazy-loaded. + * @return Content|null The content of the given slot, or null if access is forbidden. + */ + public function getContent( $role, $audience = self::FOR_PUBLIC, User $user = null ) { + // XXX: throwing an exception would be nicer, but would a further + // departure from the signature of Revision::getContent(), and thus + // more complex and error prone refactoring. + if ( !$this->audienceCan( self::DELETED_TEXT, $audience, $user ) ) { + return null; + } + + $content = $this->getSlot( $role, $audience, $user )->getContent(); + return $content->copy(); + } + + /** + * Returns meta-data for the given slot. + * + * @param string $role The role name of the desired slot + * @param int $audience + * @param User|null $user + * + * @throws RevisionAccessException if the slot does not exist or slot data + * could not be lazy-loaded. + * @return SlotRecord The slot meta-data. If access to the slot content is forbidden, + * calling getContent() on the SlotRecord will throw an exception. + */ + public function getSlot( $role, $audience = self::FOR_PUBLIC, User $user = null ) { + $slot = $this->mSlots->getSlot( $role ); + + if ( !$this->audienceCan( self::DELETED_TEXT, $audience, $user ) ) { + return SlotRecord::newWithSuppressedContent( $slot ); + } + + return $slot; + } + + /** + * Returns whether the given slot is defined in this revision. + * + * @param string $role The role name of the desired slot + * + * @return bool + */ + public function hasSlot( $role ) { + return $this->mSlots->hasSlot( $role ); + } + + /** + * Returns the slot names (roles) of all slots present in this revision. + * getContent() will succeed only for the names returned by this method. + * + * @return string[] + */ + public function getSlotRoles() { + return $this->mSlots->getSlotRoles(); + } + + /** + * Get revision ID. Depending on the concrete subclass, this may return null if + * the revision ID is not known (e.g. because the revision does not yet exist + * in the database). + * + * MCR migration note: this replaces Revision::getId + * + * @return int|null + */ + public function getId() { + return $this->mId; + } + + /** + * Get parent revision ID (the original previous page revision). + * If there is no parent revision, this returns 0. + * If the parent revision is undefined or unknown, this returns null. + * + * @note As of MW 1.31, the database schema allows the parent ID to be + * NULL to indicate that it is unknown. + * + * MCR migration note: this replaces Revision::getParentId + * + * @return int|null + */ + public function getParentId() { + return $this->mParentId; + } + + /** + * Returns the nominal size of this revision, in bogo-bytes. + * May be calculated on the fly if not known, which may in the worst + * case may involve loading all content. + * + * MCR migration note: this replaces Revision::getSize + * + * @throws RevisionAccessException if the size was unknown and could not be calculated. + * @return int + */ + abstract public function getSize(); + + /** + * Returns the base36 sha1 of this revision. This hash is derived from the + * hashes of all slots associated with the revision. + * May be calculated on the fly if not known, which may in the worst + * case may involve loading all content. + * + * MCR migration note: this replaces Revision::getSha1 + * + * @throws RevisionAccessException if the hash was unknown and could not be calculated. + * @return string + */ + abstract public function getSha1(); + + /** + * Get the page ID. If the page does not yet exist, the page ID is 0. + * + * MCR migration note: this replaces Revision::getPage + * + * @return int + */ + public function getPageId() { + return $this->mPageId; + } + + /** + * Get the ID of the wiki this revision belongs to. + * + * @return string|false The wiki's logical name, of false to indicate the local wiki. + */ + public function getWikiId() { + return $this->mWiki; + } + + /** + * Returns the title of the page this revision is associated with as a LinkTarget object. + * + * MCR migration note: this replaces Revision::getTitle + * + * @return LinkTarget + */ + public function getPageAsLinkTarget() { + return $this->mTitle; + } + + /** + * Fetch revision's author's user identity, if it's available to the specified audience. + * If the specified audience does not have access to it, null will be + * returned. Depending on the concrete subclass, null may also be returned if the user is + * not yet specified. + * + * MCR migration note: this replaces Revision::getUser + * + * @param int $audience One of: + * RevisionRecord::FOR_PUBLIC to be displayed to all users + * RevisionRecord::FOR_THIS_USER to be displayed to the given user + * RevisionRecord::RAW get the ID regardless of permissions + * @param User|null $user User object to check for, only if FOR_THIS_USER is passed + * to the $audience parameter + * @return UserIdentity|null + */ + public function getUser( $audience = self::FOR_PUBLIC, User $user = null ) { + if ( !$this->audienceCan( self::DELETED_USER, $audience, $user ) ) { + return null; + } else { + return $this->mUser; + } + } + + /** + * Fetch revision comment, if it's available to the specified audience. + * If the specified audience does not have access to the comment, + * this will return null. Depending on the concrete subclass, null may also be returned + * if the comment is not yet specified. + * + * MCR migration note: this replaces Revision::getComment + * + * @param int $audience One of: + * RevisionRecord::FOR_PUBLIC to be displayed to all users + * RevisionRecord::FOR_THIS_USER to be displayed to the given user + * RevisionRecord::RAW get the text regardless of permissions + * @param User|null $user User object to check for, only if FOR_THIS_USER is passed + * to the $audience parameter + * + * @return CommentStoreComment|null + */ + public function getComment( $audience = self::FOR_PUBLIC, User $user = null ) { + if ( !$this->audienceCan( self::DELETED_COMMENT, $audience, $user ) ) { + return null; + } else { + return $this->mComment; + } + } + + /** + * MCR migration note: this replaces Revision::isMinor + * + * @return bool + */ + public function isMinor() { + return (bool)$this->mMinorEdit; + } + + /** + * MCR migration note: this replaces Revision::isDeleted + * + * @param int $field One of DELETED_* bitfield constants + * + * @return bool + */ + public function isDeleted( $field ) { + return ( $this->getVisibility() & $field ) == $field; + } + + /** + * Get the deletion bitfield of the revision + * + * MCR migration note: this replaces Revision::getVisibility + * + * @return int + */ + public function getVisibility() { + return (int)$this->mDeleted; + } + + /** + * MCR migration note: this replaces Revision::getTimestamp. + * + * May return null if the timestamp was not specified. + * + * @return string|null + */ + public function getTimestamp() { + return $this->mTimestamp; + } + + /** + * Check that the given audience has access to the given field. + * + * MCR migration note: this corresponds to Revision::userCan + * + * @param int $field One of self::DELETED_TEXT, + * self::DELETED_COMMENT, + * self::DELETED_USER + * @param int $audience One of: + * RevisionRecord::FOR_PUBLIC to be displayed to all users + * RevisionRecord::FOR_THIS_USER to be displayed to the given user + * RevisionRecord::RAW get the text regardless of permissions + * @param User|null $user User object to check. Required if $audience is FOR_THIS_USER, + * ignored otherwise. + * + * @return bool + */ + protected function audienceCan( $field, $audience, User $user = null ) { + if ( $audience == self::FOR_PUBLIC && $this->isDeleted( $field ) ) { + return false; + } elseif ( $audience == self::FOR_THIS_USER ) { + if ( !$user ) { + throw new InvalidArgumentException( + 'A User object must be given when checking FOR_THIS_USER audience.' + ); + } + + if ( !$this->userCan( $field, $user ) ) { + return false; + } + } + + return true; + } + + /** + * Determine if the current user is allowed to view a particular + * field of this revision, if it's marked as deleted. + * + * MCR migration note: this corresponds to Revision::userCan + * + * @param int $field One of self::DELETED_TEXT, + * self::DELETED_COMMENT, + * self::DELETED_USER + * @param User $user User object to check + * @return bool + */ + protected function userCan( $field, User $user ) { + // TODO: use callback for permission checks, so we don't need to know a Title object! + return self::userCanBitfield( $this->getVisibility(), $field, $user, $this->mTitle ); + } + + /** + * Determine if the current user is allowed to view a particular + * field of this revision, if it's marked as deleted. This is used + * by various classes to avoid duplication. + * + * MCR migration note: this replaces Revision::userCanBitfield + * + * @param int $bitfield Current field + * @param int $field One of self::DELETED_TEXT = File::DELETED_FILE, + * self::DELETED_COMMENT = File::DELETED_COMMENT, + * self::DELETED_USER = File::DELETED_USER + * @param User $user User object to check + * @param Title|null $title A Title object to check for per-page restrictions on, + * instead of just plain userrights + * @return bool + */ + public static function userCanBitfield( $bitfield, $field, User $user, Title $title = null ) { + if ( $bitfield & $field ) { // aspect is deleted + if ( $bitfield & self::DELETED_RESTRICTED ) { + $permissions = [ 'suppressrevision', 'viewsuppressed' ]; + } elseif ( $field & self::DELETED_TEXT ) { + $permissions = [ 'deletedtext' ]; + } else { + $permissions = [ 'deletedhistory' ]; + } + $permissionlist = implode( ', ', $permissions ); + if ( $title === null ) { + wfDebug( "Checking for $permissionlist due to $field match on $bitfield\n" ); + return call_user_func_array( [ $user, 'isAllowedAny' ], $permissions ); + } else { + $text = $title->getPrefixedText(); + wfDebug( "Checking for $permissionlist on $text due to $field match on $bitfield\n" ); + foreach ( $permissions as $perm ) { + if ( $title->userCan( $perm, $user ) ) { + return true; + } + } + return false; + } + } else { + return true; + } + } + +} diff --git a/www/wiki/includes/Storage/RevisionSlots.php b/www/wiki/includes/Storage/RevisionSlots.php new file mode 100644 index 00000000..7fa5431d --- /dev/null +++ b/www/wiki/includes/Storage/RevisionSlots.php @@ -0,0 +1,202 @@ +<?php +/** + * Value object representing the set of slots belonging to a revision. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +use Content; +use LogicException; +use Wikimedia\Assert\Assert; + +/** + * Value object representing the set of slots belonging to a revision. + * + * @since 1.31 + */ +class RevisionSlots { + + /** @var SlotRecord[]|callable */ + protected $slots; + + /** + * @param SlotRecord[]|callable $slots SlotRecords, + * or a callback that returns such a structure. + */ + public function __construct( $slots ) { + Assert::parameterType( 'array|callable', $slots, '$slots' ); + + if ( is_callable( $slots ) ) { + $this->slots = $slots; + } else { + $this->setSlotsInternal( $slots ); + } + } + + /** + * @param SlotRecord[] $slots + */ + private function setSlotsInternal( array $slots ) { + $this->slots = []; + + // re-key the slot array + foreach ( $slots as $slot ) { + $role = $slot->getRole(); + $this->slots[$role] = $slot; + } + } + + /** + * Implemented to defy serialization. + * + * @throws LogicException always + */ + public function __sleep() { + throw new LogicException( __CLASS__ . ' is not serializable.' ); + } + + /** + * Returns the Content of the given slot. + * Call getSlotNames() to get a list of available slots. + * + * Note that for mutable Content objects, each call to this method will return a + * fresh clone. + * + * @param string $role The role name of the desired slot + * + * @throws RevisionAccessException if the slot does not exist or slot data + * could not be lazy-loaded. + * @return Content + */ + public function getContent( $role ) { + // Return a copy to be safe. Immutable content objects return $this from copy(). + return $this->getSlot( $role )->getContent()->copy(); + } + + /** + * Returns the SlotRecord of the given slot. + * Call getSlotNames() to get a list of available slots. + * + * @param string $role The role name of the desired slot + * + * @throws RevisionAccessException if the slot does not exist or slot data + * could not be lazy-loaded. + * @return SlotRecord + */ + public function getSlot( $role ) { + $slots = $this->getSlots(); + + if ( isset( $slots[$role] ) ) { + return $slots[$role]; + } else { + throw new RevisionAccessException( 'No such slot: ' . $role ); + } + } + + /** + * Returns whether the given slot is set. + * + * @param string $role The role name of the desired slot + * + * @return bool + */ + public function hasSlot( $role ) { + $slots = $this->getSlots(); + + return isset( $slots[$role] ); + } + + /** + * Returns the slot names (roles) of all slots present in this revision. + * getContent() will succeed only for the names returned by this method. + * + * @return string[] + */ + public function getSlotRoles() { + $slots = $this->getSlots(); + return array_keys( $slots ); + } + + /** + * Computes the total nominal size of the revision's slots, in bogo-bytes. + * + * @warn This is potentially expensive! It may cause all slot's content to be loaded + * and deserialized. + * + * @return int + */ + public function computeSize() { + return array_reduce( $this->getSlots(), function ( $accu, SlotRecord $slot ) { + return $accu + $slot->getSize(); + }, 0 ); + } + + /** + * Returns an associative array that maps role names to SlotRecords. Each SlotRecord + * represents the content meta-data of a slot, together they define the content of + * a revision. + * + * @note This may cause the content meta-data for the revision to be lazy-loaded. + * + * @return SlotRecord[] revision slot/content rows, keyed by slot role name. + */ + public function getSlots() { + if ( is_callable( $this->slots ) ) { + $slots = call_user_func( $this->slots ); + + Assert::postcondition( + is_array( $slots ), + 'Slots info callback should return an array of objects' + ); + + $this->setSlotsInternal( $slots ); + } + + return $this->slots; + } + + /** + * Computes the combined hash of the revisions's slots. + * + * @note For backwards compatibility, the combined hash of a single slot + * is that slot's hash. For consistency, the combined hash of an empty set of slots + * is the hash of the empty string. + * + * @warn This is potentially expensive! It may cause all slot's content to be loaded + * and deserialized, then re-serialized and hashed. + * + * @return string + */ + public function computeSha1() { + $slots = $this->getSlots(); + ksort( $slots ); + + if ( empty( $slots ) ) { + return SlotRecord::base36Sha1( '' ); + } + + return array_reduce( $slots, function ( $accu, SlotRecord $slot ) { + return $accu === null + ? $slot->getSha1() + : SlotRecord::base36Sha1( $accu . $slot->getSha1() ); + }, null ); + } + +} diff --git a/www/wiki/includes/Storage/RevisionStore.php b/www/wiki/includes/Storage/RevisionStore.php new file mode 100644 index 00000000..13aedbab --- /dev/null +++ b/www/wiki/includes/Storage/RevisionStore.php @@ -0,0 +1,2017 @@ +<?php +/** + * Service for looking up page revisions. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * Attribution notice: when this file was created, much of its content was taken + * from the Revision.php file as present in release 1.30. Refer to the history + * of that file for original authorship. + * + * @file + */ + +namespace MediaWiki\Storage; + +use ActorMigration; +use CommentStore; +use CommentStoreComment; +use Content; +use ContentHandler; +use DBAccessObjectUtils; +use Hooks; +use IDBAccessObject; +use InvalidArgumentException; +use IP; +use LogicException; +use MediaWiki\Linker\LinkTarget; +use MediaWiki\User\UserIdentity; +use MediaWiki\User\UserIdentityValue; +use Message; +use MWException; +use MWUnknownContentModelException; +use Psr\Log\LoggerAwareInterface; +use Psr\Log\LoggerInterface; +use Psr\Log\NullLogger; +use RecentChange; +use stdClass; +use Title; +use User; +use WANObjectCache; +use Wikimedia\Assert\Assert; +use Wikimedia\Rdbms\Database; +use Wikimedia\Rdbms\DBConnRef; +use Wikimedia\Rdbms\IDatabase; +use Wikimedia\Rdbms\LoadBalancer; + +/** + * Service for looking up page revisions. + * + * @since 1.31 + * + * @note This was written to act as a drop-in replacement for the corresponding + * static methods in Revision. + */ +class RevisionStore + implements IDBAccessObject, RevisionFactory, RevisionLookup, LoggerAwareInterface { + + /** + * @var SqlBlobStore + */ + private $blobStore; + + /** + * @var bool|string + */ + private $wikiId; + + /** + * @var boolean + */ + private $contentHandlerUseDB = true; + + /** + * @var LoadBalancer + */ + private $loadBalancer; + + /** + * @var WANObjectCache + */ + private $cache; + + /** + * @var CommentStore + */ + private $commentStore; + + /** + * @var ActorMigration + */ + private $actorMigration; + + /** + * @var LoggerInterface + */ + private $logger; + + /** + * @todo $blobStore should be allowed to be any BlobStore! + * + * @param LoadBalancer $loadBalancer + * @param SqlBlobStore $blobStore + * @param WANObjectCache $cache + * @param CommentStore $commentStore + * @param ActorMigration $actorMigration + * @param bool|string $wikiId + */ + public function __construct( + LoadBalancer $loadBalancer, + SqlBlobStore $blobStore, + WANObjectCache $cache, + CommentStore $commentStore, + ActorMigration $actorMigration, + $wikiId = false + ) { + Assert::parameterType( 'string|boolean', $wikiId, '$wikiId' ); + + $this->loadBalancer = $loadBalancer; + $this->blobStore = $blobStore; + $this->cache = $cache; + $this->commentStore = $commentStore; + $this->actorMigration = $actorMigration; + $this->wikiId = $wikiId; + $this->logger = new NullLogger(); + } + + public function setLogger( LoggerInterface $logger ) { + $this->logger = $logger; + } + + /** + * @return bool Whether the store is read-only + */ + public function isReadOnly() { + return $this->blobStore->isReadOnly(); + } + + /** + * @return bool + */ + public function getContentHandlerUseDB() { + return $this->contentHandlerUseDB; + } + + /** + * @param bool $contentHandlerUseDB + */ + public function setContentHandlerUseDB( $contentHandlerUseDB ) { + $this->contentHandlerUseDB = $contentHandlerUseDB; + } + + /** + * @return LoadBalancer + */ + private function getDBLoadBalancer() { + return $this->loadBalancer; + } + + /** + * @param int $mode DB_MASTER or DB_REPLICA + * + * @return IDatabase + */ + private function getDBConnection( $mode ) { + $lb = $this->getDBLoadBalancer(); + return $lb->getConnection( $mode, [], $this->wikiId ); + } + + /** + * @param IDatabase $connection + */ + private function releaseDBConnection( IDatabase $connection ) { + $lb = $this->getDBLoadBalancer(); + $lb->reuseConnection( $connection ); + } + + /** + * @param int $mode DB_MASTER or DB_REPLICA + * + * @return DBConnRef + */ + private function getDBConnectionRef( $mode ) { + $lb = $this->getDBLoadBalancer(); + return $lb->getConnectionRef( $mode, [], $this->wikiId ); + } + + /** + * Determines the page Title based on the available information. + * + * MCR migration note: this corresponds to Revision::getTitle + * + * @note this method should be private, external use should be avoided! + * + * @param int|null $pageId + * @param int|null $revId + * @param int $queryFlags + * + * @return Title + * @throws RevisionAccessException + */ + public function getTitle( $pageId, $revId, $queryFlags = self::READ_NORMAL ) { + if ( !$pageId && !$revId ) { + throw new InvalidArgumentException( '$pageId and $revId cannot both be 0 or null' ); + } + + // This method recalls itself with READ_LATEST if READ_NORMAL doesn't get us a Title + // So ignore READ_LATEST_IMMUTABLE flags and handle the fallback logic in this method + if ( DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST_IMMUTABLE ) ) { + $queryFlags = self::READ_NORMAL; + } + + $canUseTitleNewFromId = ( $pageId !== null && $pageId > 0 && $this->wikiId === false ); + list( $dbMode, $dbOptions ) = DBAccessObjectUtils::getDBOptions( $queryFlags ); + $titleFlags = ( $dbMode == DB_MASTER ? Title::GAID_FOR_UPDATE : 0 ); + + // Loading by ID is best, but Title::newFromID does not support that for foreign IDs. + if ( $canUseTitleNewFromId ) { + // TODO: better foreign title handling (introduce TitleFactory) + $title = Title::newFromID( $pageId, $titleFlags ); + if ( $title ) { + return $title; + } + } + + // rev_id is defined as NOT NULL, but this revision may not yet have been inserted. + $canUseRevId = ( $revId !== null && $revId > 0 ); + + if ( $canUseRevId ) { + $dbr = $this->getDBConnectionRef( $dbMode ); + // @todo: Title::getSelectFields(), or Title::getQueryInfo(), or something like that + $row = $dbr->selectRow( + [ 'revision', 'page' ], + [ + 'page_namespace', + 'page_title', + 'page_id', + 'page_latest', + 'page_is_redirect', + 'page_len', + ], + [ 'rev_id' => $revId ], + __METHOD__, + $dbOptions, + [ 'page' => [ 'JOIN', 'page_id=rev_page' ] ] + ); + if ( $row ) { + // TODO: better foreign title handling (introduce TitleFactory) + return Title::newFromRow( $row ); + } + } + + // If we still don't have a title, fallback to master if that wasn't already happening. + if ( $dbMode !== DB_MASTER ) { + $title = $this->getTitle( $pageId, $revId, self::READ_LATEST ); + if ( $title ) { + $this->logger->info( + __METHOD__ . ' fell back to READ_LATEST and got a Title.', + [ 'trace' => wfBacktrace() ] + ); + return $title; + } + } + + throw new RevisionAccessException( + "Could not determine title for page ID $pageId and revision ID $revId" + ); + } + + /** + * @param mixed $value + * @param string $name + * + * @throw IncompleteRevisionException if $value is null + * @return mixed $value, if $value is not null + */ + private function failOnNull( $value, $name ) { + if ( $value === null ) { + throw new IncompleteRevisionException( + "$name must not be " . var_export( $value, true ) . "!" + ); + } + + return $value; + } + + /** + * @param mixed $value + * @param string $name + * + * @throw IncompleteRevisionException if $value is empty + * @return mixed $value, if $value is not null + */ + private function failOnEmpty( $value, $name ) { + if ( $value === null || $value === 0 || $value === '' ) { + throw new IncompleteRevisionException( + "$name must not be " . var_export( $value, true ) . "!" + ); + } + + return $value; + } + + /** + * Insert a new revision into the database, returning the new revision record + * on success and dies horribly on failure. + * + * MCR migration note: this replaces Revision::insertOn + * + * @param RevisionRecord $rev + * @param IDatabase $dbw (master connection) + * + * @throws InvalidArgumentException + * @return RevisionRecord the new revision record. + */ + public function insertRevisionOn( RevisionRecord $rev, IDatabase $dbw ) { + // TODO: pass in a DBTransactionContext instead of a database connection. + $this->checkDatabaseWikiId( $dbw ); + + if ( !$rev->getSlotRoles() ) { + throw new InvalidArgumentException( 'At least one slot needs to be defined!' ); + } + + if ( $rev->getSlotRoles() !== [ 'main' ] ) { + throw new InvalidArgumentException( 'Only the main slot is supported for now!' ); + } + + // TODO: we shouldn't need an actual Title here. + $title = Title::newFromLinkTarget( $rev->getPageAsLinkTarget() ); + $pageId = $this->failOnEmpty( $rev->getPageId(), 'rev_page field' ); // check this early + + $parentId = $rev->getParentId() === null + ? $this->getPreviousRevisionId( $dbw, $rev ) + : $rev->getParentId(); + + // Record the text (or external storage URL) to the blob store + $slot = $rev->getSlot( 'main', RevisionRecord::RAW ); + + $size = $this->failOnNull( $rev->getSize(), 'size field' ); + $sha1 = $this->failOnEmpty( $rev->getSha1(), 'sha1 field' ); + + if ( !$slot->hasAddress() ) { + $content = $slot->getContent(); + $format = $content->getDefaultFormat(); + $model = $content->getModel(); + + $this->checkContentModel( $content, $title ); + + $data = $content->serialize( $format ); + + // Hints allow the blob store to optimize by "leaking" application level information to it. + // TODO: with the new MCR storage schema, we rev_id have this before storing the blobs. + // When we have it, add rev_id as a hint. Can be used with rev_parent_id for + // differential storage or compression of subsequent revisions. + $blobHints = [ + BlobStore::DESIGNATION_HINT => 'page-content', // BlobStore may be used for other things too. + BlobStore::PAGE_HINT => $pageId, + BlobStore::ROLE_HINT => $slot->getRole(), + BlobStore::PARENT_HINT => $parentId, + BlobStore::SHA1_HINT => $slot->getSha1(), + BlobStore::MODEL_HINT => $model, + BlobStore::FORMAT_HINT => $format, + ]; + + $blobAddress = $this->blobStore->storeBlob( $data, $blobHints ); + } else { + $blobAddress = $slot->getAddress(); + $model = $slot->getModel(); + $format = $slot->getFormat(); + } + + $textId = $this->blobStore->getTextIdFromAddress( $blobAddress ); + + if ( !$textId ) { + throw new LogicException( + 'Blob address not supported in 1.29 database schema: ' . $blobAddress + ); + } + + // getTextIdFromAddress() is free to insert something into the text table, so $textId + // may be a new value, not anything already contained in $blobAddress. + $blobAddress = 'tt:' . $textId; + + $comment = $this->failOnNull( $rev->getComment( RevisionRecord::RAW ), 'comment' ); + $user = $this->failOnNull( $rev->getUser( RevisionRecord::RAW ), 'user' ); + $timestamp = $this->failOnEmpty( $rev->getTimestamp(), 'timestamp field' ); + + // Checks. + $this->failOnNull( $user->getId(), 'user field' ); + $this->failOnEmpty( $user->getName(), 'user_text field' ); + + # Record the edit in revisions + $row = [ + 'rev_page' => $pageId, + 'rev_parent_id' => $parentId, + 'rev_text_id' => $textId, + 'rev_minor_edit' => $rev->isMinor() ? 1 : 0, + 'rev_timestamp' => $dbw->timestamp( $timestamp ), + 'rev_deleted' => $rev->getVisibility(), + 'rev_len' => $size, + 'rev_sha1' => $sha1, + ]; + + if ( $rev->getId() !== null ) { + // Needed to restore revisions with their original ID + $row['rev_id'] = $rev->getId(); + } + + list( $commentFields, $commentCallback ) = + $this->commentStore->insertWithTempTable( $dbw, 'rev_comment', $comment ); + $row += $commentFields; + + list( $actorFields, $actorCallback ) = + $this->actorMigration->getInsertValuesWithTempTable( $dbw, 'rev_user', $user ); + $row += $actorFields; + + if ( $this->contentHandlerUseDB ) { + // MCR migration note: rev_content_model and rev_content_format will go away + + $defaultModel = ContentHandler::getDefaultModelFor( $title ); + $defaultFormat = ContentHandler::getForModelID( $defaultModel )->getDefaultFormat(); + + $row['rev_content_model'] = ( $model === $defaultModel ) ? null : $model; + $row['rev_content_format'] = ( $format === $defaultFormat ) ? null : $format; + } + + $dbw->insert( 'revision', $row, __METHOD__ ); + + if ( !isset( $row['rev_id'] ) ) { + // only if auto-increment was used + $row['rev_id'] = intval( $dbw->insertId() ); + } + $commentCallback( $row['rev_id'] ); + $actorCallback( $row['rev_id'], $row ); + + // Insert IP revision into ip_changes for use when querying for a range. + if ( $user->getId() === 0 && IP::isValid( $user->getName() ) ) { + $ipcRow = [ + 'ipc_rev_id' => $row['rev_id'], + 'ipc_rev_timestamp' => $row['rev_timestamp'], + 'ipc_hex' => IP::toHex( $user->getName() ), + ]; + $dbw->insert( 'ip_changes', $ipcRow, __METHOD__ ); + } + + $newSlot = SlotRecord::newSaved( $row['rev_id'], $textId, $blobAddress, $slot ); + $slots = new RevisionSlots( [ 'main' => $newSlot ] ); + + $rev = new RevisionStoreRecord( + $title, + $user, + $comment, + (object)$row, + $slots, + $this->wikiId + ); + + $newSlot = $rev->getSlot( 'main', RevisionRecord::RAW ); + + // sanity checks + Assert::postcondition( $rev->getId() > 0, 'revision must have an ID' ); + Assert::postcondition( $rev->getPageId() > 0, 'revision must have a page ID' ); + Assert::postcondition( + $rev->getComment( RevisionRecord::RAW ) !== null, + 'revision must have a comment' + ); + Assert::postcondition( + $rev->getUser( RevisionRecord::RAW ) !== null, + 'revision must have a user' + ); + + Assert::postcondition( $newSlot !== null, 'revision must have a main slot' ); + Assert::postcondition( + $newSlot->getAddress() !== null, + 'main slot must have an addess' + ); + + Hooks::run( 'RevisionRecordInserted', [ $rev ] ); + + return $rev; + } + + /** + * MCR migration note: this corresponds to Revision::checkContentModel + * + * @param Content $content + * @param Title $title + * + * @throws MWException + * @throws MWUnknownContentModelException + */ + private function checkContentModel( Content $content, Title $title ) { + // Note: may return null for revisions that have not yet been inserted + + $model = $content->getModel(); + $format = $content->getDefaultFormat(); + $handler = $content->getContentHandler(); + + $name = "$title"; + + if ( !$handler->isSupportedFormat( $format ) ) { + throw new MWException( "Can't use format $format with content model $model on $name" ); + } + + if ( !$this->contentHandlerUseDB ) { + // if $wgContentHandlerUseDB is not set, + // all revisions must use the default content model and format. + + $defaultModel = ContentHandler::getDefaultModelFor( $title ); + $defaultHandler = ContentHandler::getForModelID( $defaultModel ); + $defaultFormat = $defaultHandler->getDefaultFormat(); + + if ( $model != $defaultModel ) { + throw new MWException( "Can't save non-default content model with " + . "\$wgContentHandlerUseDB disabled: model is $model, " + . "default for $name is $defaultModel" + ); + } + + if ( $format != $defaultFormat ) { + throw new MWException( "Can't use non-default content format with " + . "\$wgContentHandlerUseDB disabled: format is $format, " + . "default for $name is $defaultFormat" + ); + } + } + + if ( !$content->isValid() ) { + throw new MWException( + "New content for $name is not valid! Content model is $model" + ); + } + } + + /** + * Create a new null-revision for insertion into a page's + * history. This will not re-save the text, but simply refer + * to the text from the previous version. + * + * Such revisions can for instance identify page rename + * operations and other such meta-modifications. + * + * MCR migration note: this replaces Revision::newNullRevision + * + * @todo Introduce newFromParentRevision(). newNullRevision can then be based on that + * (or go away). + * + * @param IDatabase $dbw + * @param Title $title Title of the page to read from + * @param CommentStoreComment $comment RevisionRecord's summary + * @param bool $minor Whether the revision should be considered as minor + * @param User $user The user to attribute the revision to + * @return RevisionRecord|null RevisionRecord or null on error + */ + public function newNullRevision( + IDatabase $dbw, + Title $title, + CommentStoreComment $comment, + $minor, + User $user + ) { + $this->checkDatabaseWikiId( $dbw ); + + $fields = [ 'page_latest', 'page_namespace', 'page_title', + 'rev_id', 'rev_text_id', 'rev_len', 'rev_sha1' ]; + + if ( $this->contentHandlerUseDB ) { + $fields[] = 'rev_content_model'; + $fields[] = 'rev_content_format'; + } + + $current = $dbw->selectRow( + [ 'page', 'revision' ], + $fields, + [ + 'page_id' => $title->getArticleID(), + 'page_latest=rev_id', + ], + __METHOD__, + [ 'FOR UPDATE' ] // T51581 + ); + + if ( $current ) { + $fields = [ + 'page' => $title->getArticleID(), + 'user_text' => $user->getName(), + 'user' => $user->getId(), + 'actor' => $user->getActorId(), + 'comment' => $comment, + 'minor_edit' => $minor, + 'text_id' => $current->rev_text_id, + 'parent_id' => $current->page_latest, + 'slot_origin' => $current->page_latest, + 'len' => $current->rev_len, + 'sha1' => $current->rev_sha1 + ]; + + if ( $this->contentHandlerUseDB ) { + $fields['content_model'] = $current->rev_content_model; + $fields['content_format'] = $current->rev_content_format; + } + + $fields['title'] = Title::makeTitle( $current->page_namespace, $current->page_title ); + + $mainSlot = $this->emulateMainSlot_1_29( $fields, self::READ_LATEST, $title ); + $revision = new MutableRevisionRecord( $title, $this->wikiId ); + $this->initializeMutableRevisionFromArray( $revision, $fields ); + $revision->setSlot( $mainSlot ); + } else { + $revision = null; + } + + return $revision; + } + + /** + * MCR migration note: this replaces Revision::isUnpatrolled + * + * @todo This is overly specific, so move or kill this method. + * + * @param RevisionRecord $rev + * + * @return int Rcid of the unpatrolled row, zero if there isn't one + */ + public function getRcIdIfUnpatrolled( RevisionRecord $rev ) { + $rc = $this->getRecentChange( $rev ); + if ( $rc && $rc->getAttribute( 'rc_patrolled' ) == RecentChange::PRC_UNPATROLLED ) { + return $rc->getAttribute( 'rc_id' ); + } else { + return 0; + } + } + + /** + * Get the RC object belonging to the current revision, if there's one + * + * MCR migration note: this replaces Revision::getRecentChange + * + * @todo move this somewhere else? + * + * @param RevisionRecord $rev + * @param int $flags (optional) $flags include: + * IDBAccessObject::READ_LATEST: Select the data from the master + * + * @return null|RecentChange + */ + public function getRecentChange( RevisionRecord $rev, $flags = 0 ) { + $dbr = $this->getDBConnection( DB_REPLICA ); + + list( $dbType, ) = DBAccessObjectUtils::getDBOptions( $flags ); + + $userIdentity = $rev->getUser( RevisionRecord::RAW ); + + if ( !$userIdentity ) { + // If the revision has no user identity, chances are it never went + // into the database, and doesn't have an RC entry. + return null; + } + + // TODO: Select by rc_this_oldid alone - but as of Nov 2017, there is no index on that! + $actorWhere = $this->actorMigration->getWhere( $dbr, 'rc_user', $rev->getUser(), false ); + $rc = RecentChange::newFromConds( + [ + $actorWhere['conds'], + 'rc_timestamp' => $dbr->timestamp( $rev->getTimestamp() ), + 'rc_this_oldid' => $rev->getId() + ], + __METHOD__, + $dbType + ); + + $this->releaseDBConnection( $dbr ); + + // XXX: cache this locally? Glue it to the RevisionRecord? + return $rc; + } + + /** + * Maps fields of the archive row to corresponding revision rows. + * + * @param object $archiveRow + * + * @return object a revision row object, corresponding to $archiveRow. + */ + private static function mapArchiveFields( $archiveRow ) { + $fieldMap = [ + // keep with ar prefix: + 'ar_id' => 'ar_id', + + // not the same suffix: + 'ar_page_id' => 'rev_page', + 'ar_rev_id' => 'rev_id', + + // same suffix: + 'ar_text_id' => 'rev_text_id', + 'ar_timestamp' => 'rev_timestamp', + 'ar_user_text' => 'rev_user_text', + 'ar_user' => 'rev_user', + 'ar_actor' => 'rev_actor', + 'ar_minor_edit' => 'rev_minor_edit', + 'ar_deleted' => 'rev_deleted', + 'ar_len' => 'rev_len', + 'ar_parent_id' => 'rev_parent_id', + 'ar_sha1' => 'rev_sha1', + 'ar_comment' => 'rev_comment', + 'ar_comment_cid' => 'rev_comment_cid', + 'ar_comment_id' => 'rev_comment_id', + 'ar_comment_text' => 'rev_comment_text', + 'ar_comment_data' => 'rev_comment_data', + 'ar_comment_old' => 'rev_comment_old', + 'ar_content_format' => 'rev_content_format', + 'ar_content_model' => 'rev_content_model', + ]; + + $revRow = new stdClass(); + foreach ( $fieldMap as $arKey => $revKey ) { + if ( property_exists( $archiveRow, $arKey ) ) { + $revRow->$revKey = $archiveRow->$arKey; + } + } + + return $revRow; + } + + /** + * Constructs a RevisionRecord for the revisions main slot, based on the MW1.29 schema. + * + * @param object|array $row Either a database row or an array + * @param int $queryFlags for callbacks + * @param Title $title + * + * @return SlotRecord The main slot, extracted from the MW 1.29 style row. + * @throws MWException + */ + private function emulateMainSlot_1_29( $row, $queryFlags, Title $title ) { + $mainSlotRow = new stdClass(); + $mainSlotRow->role_name = 'main'; + $mainSlotRow->model_name = null; + $mainSlotRow->slot_revision_id = null; + $mainSlotRow->content_address = null; + $mainSlotRow->slot_content_id = null; + + $content = null; + $blobData = null; + $blobFlags = null; + + if ( is_object( $row ) ) { + // archive row + if ( !isset( $row->rev_id ) && ( isset( $row->ar_user ) || isset( $row->ar_actor ) ) ) { + $row = $this->mapArchiveFields( $row ); + } + + if ( isset( $row->rev_text_id ) && $row->rev_text_id > 0 ) { + $mainSlotRow->slot_content_id = $row->rev_text_id; + $mainSlotRow->content_address = 'tt:' . $row->rev_text_id; + } + + // This is used by null-revisions + $mainSlotRow->slot_origin = isset( $row->slot_origin ) + ? intval( $row->slot_origin ) + : null; + + if ( isset( $row->old_text ) ) { + // this happens when the text-table gets joined directly, in the pre-1.30 schema + $blobData = isset( $row->old_text ) ? strval( $row->old_text ) : null; + // Check against selects that might have not included old_flags + if ( !property_exists( $row, 'old_flags' ) ) { + throw new InvalidArgumentException( 'old_flags was not set in $row' ); + } + $blobFlags = ( $row->old_flags === null ) ? '' : $row->old_flags; + } + + $mainSlotRow->slot_revision_id = intval( $row->rev_id ); + + $mainSlotRow->content_size = isset( $row->rev_len ) ? intval( $row->rev_len ) : null; + $mainSlotRow->content_sha1 = isset( $row->rev_sha1 ) ? strval( $row->rev_sha1 ) : null; + $mainSlotRow->model_name = isset( $row->rev_content_model ) + ? strval( $row->rev_content_model ) + : null; + // XXX: in the future, we'll probably always use the default format, and drop content_format + $mainSlotRow->format_name = isset( $row->rev_content_format ) + ? strval( $row->rev_content_format ) + : null; + } elseif ( is_array( $row ) ) { + $mainSlotRow->slot_revision_id = isset( $row['id'] ) ? intval( $row['id'] ) : null; + + $mainSlotRow->slot_content_id = isset( $row['text_id'] ) + ? intval( $row['text_id'] ) + : null; + $mainSlotRow->slot_origin = isset( $row['slot_origin'] ) + ? intval( $row['slot_origin'] ) + : null; + $mainSlotRow->content_address = isset( $row['text_id'] ) + ? 'tt:' . intval( $row['text_id'] ) + : null; + $mainSlotRow->content_size = isset( $row['len'] ) ? intval( $row['len'] ) : null; + $mainSlotRow->content_sha1 = isset( $row['sha1'] ) ? strval( $row['sha1'] ) : null; + + $mainSlotRow->model_name = isset( $row['content_model'] ) + ? strval( $row['content_model'] ) : null; // XXX: must be a string! + // XXX: in the future, we'll probably always use the default format, and drop content_format + $mainSlotRow->format_name = isset( $row['content_format'] ) + ? strval( $row['content_format'] ) : null; + $blobData = isset( $row['text'] ) ? rtrim( strval( $row['text'] ) ) : null; + // XXX: If the flags field is not set then $blobFlags should be null so that no + // decoding will happen. An empty string will result in default decodings. + $blobFlags = isset( $row['flags'] ) ? trim( strval( $row['flags'] ) ) : null; + + // if we have a Content object, override mText and mContentModel + if ( !empty( $row['content'] ) ) { + if ( !( $row['content'] instanceof Content ) ) { + throw new MWException( 'content field must contain a Content object.' ); + } + + /** @var Content $content */ + $content = $row['content']; + $handler = $content->getContentHandler(); + + $mainSlotRow->model_name = $content->getModel(); + + // XXX: in the future, we'll probably always use the default format. + if ( $mainSlotRow->format_name === null ) { + $mainSlotRow->format_name = $handler->getDefaultFormat(); + } + } + } else { + throw new MWException( 'Revision constructor passed invalid row format.' ); + } + + // With the old schema, the content changes with every revision, + // except for null-revisions. + if ( !isset( $mainSlotRow->slot_origin ) ) { + $mainSlotRow->slot_origin = $mainSlotRow->slot_revision_id; + } + + if ( $mainSlotRow->model_name === null ) { + $mainSlotRow->model_name = function ( SlotRecord $slot ) use ( $title ) { + // TODO: MCR: consider slot role in getDefaultModelFor()! Use LinkTarget! + // TODO: MCR: deprecate $title->getModel(). + return ContentHandler::getDefaultModelFor( $title ); + }; + } + + if ( !$content ) { + $content = function ( SlotRecord $slot ) + use ( $blobData, $blobFlags, $queryFlags, $mainSlotRow ) + { + return $this->loadSlotContent( + $slot, + $blobData, + $blobFlags, + $mainSlotRow->format_name, + $queryFlags + ); + }; + } + + $mainSlotRow->slot_id = $mainSlotRow->slot_revision_id; + return new SlotRecord( $mainSlotRow, $content ); + } + + /** + * Loads a Content object based on a slot row. + * + * This method does not call $slot->getContent(), and may be used as a callback + * called by $slot->getContent(). + * + * MCR migration note: this roughly corresponds to Revision::getContentInternal + * + * @param SlotRecord $slot The SlotRecord to load content for + * @param string|null $blobData The content blob, in the form indicated by $blobFlags + * @param string|null $blobFlags Flags indicating how $blobData needs to be processed. + * Use null if no processing should happen. That is in constrast to the empty string, + * which causes the blob to be decoded according to the configured legacy encoding. + * @param string|null $blobFormat MIME type indicating how $dataBlob is encoded + * @param int $queryFlags + * + * @throw RevisionAccessException + * @return Content + */ + private function loadSlotContent( + SlotRecord $slot, + $blobData = null, + $blobFlags = null, + $blobFormat = null, + $queryFlags = 0 + ) { + if ( $blobData !== null ) { + Assert::parameterType( 'string', $blobData, '$blobData' ); + Assert::parameterType( 'string|null', $blobFlags, '$blobFlags' ); + + $cacheKey = $slot->hasAddress() ? $slot->getAddress() : null; + + if ( $blobFlags === null ) { + // No blob flags, so use the blob verbatim. + $data = $blobData; + } else { + $data = $this->blobStore->expandBlob( $blobData, $blobFlags, $cacheKey ); + if ( $data === false ) { + throw new RevisionAccessException( + "Failed to expand blob data using flags $blobFlags (key: $cacheKey)" + ); + } + } + + } else { + $address = $slot->getAddress(); + try { + $data = $this->blobStore->getBlob( $address, $queryFlags ); + } catch ( BlobAccessException $e ) { + throw new RevisionAccessException( + "Failed to load data blob from $address: " . $e->getMessage(), 0, $e + ); + } + } + + // Unserialize content + $handler = ContentHandler::getForModelID( $slot->getModel() ); + + $content = $handler->unserializeContent( $data, $blobFormat ); + return $content; + } + + /** + * Load a page revision from a given revision ID number. + * Returns null if no such revision can be found. + * + * MCR migration note: this replaces Revision::newFromId + * + * $flags include: + * IDBAccessObject::READ_LATEST: Select the data from the master + * IDBAccessObject::READ_LOCKING : Select & lock the data from the master + * + * @param int $id + * @param int $flags (optional) + * @return RevisionRecord|null + */ + public function getRevisionById( $id, $flags = 0 ) { + return $this->newRevisionFromConds( [ 'rev_id' => intval( $id ) ], $flags ); + } + + /** + * Load either the current, or a specified, revision + * that's attached to a given link target. If not attached + * to that link target, will return null. + * + * MCR migration note: this replaces Revision::newFromTitle + * + * $flags include: + * IDBAccessObject::READ_LATEST: Select the data from the master + * IDBAccessObject::READ_LOCKING : Select & lock the data from the master + * + * @param LinkTarget $linkTarget + * @param int $revId (optional) + * @param int $flags Bitfield (optional) + * @return RevisionRecord|null + */ + public function getRevisionByTitle( LinkTarget $linkTarget, $revId = 0, $flags = 0 ) { + $conds = [ + 'page_namespace' => $linkTarget->getNamespace(), + 'page_title' => $linkTarget->getDBkey() + ]; + if ( $revId ) { + // Use the specified revision ID. + // Note that we use newRevisionFromConds here because we want to retry + // and fall back to master if the page is not found on a replica. + // Since the caller supplied a revision ID, we are pretty sure the revision is + // supposed to exist, so we should try hard to find it. + $conds['rev_id'] = $revId; + return $this->newRevisionFromConds( $conds, $flags ); + } else { + // Use a join to get the latest revision. + // Note that we don't use newRevisionFromConds here because we don't want to retry + // and fall back to master. The assumption is that we only want to force the fallback + // if we are quite sure the revision exists because the caller supplied a revision ID. + // If the page isn't found at all on a replica, it probably simply does not exist. + $db = $this->getDBConnection( ( $flags & self::READ_LATEST ) ? DB_MASTER : DB_REPLICA ); + + $conds[] = 'rev_id=page_latest'; + $rev = $this->loadRevisionFromConds( $db, $conds, $flags ); + + $this->releaseDBConnection( $db ); + return $rev; + } + } + + /** + * Load either the current, or a specified, revision + * that's attached to a given page ID. + * Returns null if no such revision can be found. + * + * MCR migration note: this replaces Revision::newFromPageId + * + * $flags include: + * IDBAccessObject::READ_LATEST: Select the data from the master (since 1.20) + * IDBAccessObject::READ_LOCKING : Select & lock the data from the master + * + * @param int $pageId + * @param int $revId (optional) + * @param int $flags Bitfield (optional) + * @return RevisionRecord|null + */ + public function getRevisionByPageId( $pageId, $revId = 0, $flags = 0 ) { + $conds = [ 'page_id' => $pageId ]; + if ( $revId ) { + // Use the specified revision ID. + // Note that we use newRevisionFromConds here because we want to retry + // and fall back to master if the page is not found on a replica. + // Since the caller supplied a revision ID, we are pretty sure the revision is + // supposed to exist, so we should try hard to find it. + $conds['rev_id'] = $revId; + return $this->newRevisionFromConds( $conds, $flags ); + } else { + // Use a join to get the latest revision. + // Note that we don't use newRevisionFromConds here because we don't want to retry + // and fall back to master. The assumption is that we only want to force the fallback + // if we are quite sure the revision exists because the caller supplied a revision ID. + // If the page isn't found at all on a replica, it probably simply does not exist. + $db = $this->getDBConnection( ( $flags & self::READ_LATEST ) ? DB_MASTER : DB_REPLICA ); + + $conds[] = 'rev_id=page_latest'; + $rev = $this->loadRevisionFromConds( $db, $conds, $flags ); + + $this->releaseDBConnection( $db ); + return $rev; + } + } + + /** + * Load the revision for the given title with the given timestamp. + * WARNING: Timestamps may in some circumstances not be unique, + * so this isn't the best key to use. + * + * MCR migration note: this replaces Revision::loadFromTimestamp + * + * @param Title $title + * @param string $timestamp + * @return RevisionRecord|null + */ + public function getRevisionByTimestamp( $title, $timestamp ) { + $db = $this->getDBConnection( DB_REPLICA ); + return $this->newRevisionFromConds( + [ + 'rev_timestamp' => $db->timestamp( $timestamp ), + 'page_namespace' => $title->getNamespace(), + 'page_title' => $title->getDBkey() + ], + 0, + $title + ); + } + + /** + * Make a fake revision object from an archive table row. This is queried + * for permissions or even inserted (as in Special:Undelete) + * + * MCR migration note: this replaces Revision::newFromArchiveRow + * + * @param object $row + * @param int $queryFlags + * @param Title|null $title + * @param array $overrides associative array with fields of $row to override. This may be + * used e.g. to force the parent revision ID or page ID. Keys in the array are fields + * names from the archive table without the 'ar_' prefix, i.e. use 'parent_id' to + * override ar_parent_id. + * + * @return RevisionRecord + * @throws MWException + */ + public function newRevisionFromArchiveRow( + $row, + $queryFlags = 0, + Title $title = null, + array $overrides = [] + ) { + Assert::parameterType( 'object', $row, '$row' ); + + // check second argument, since Revision::newFromArchiveRow had $overrides in that spot. + Assert::parameterType( 'integer', $queryFlags, '$queryFlags' ); + + if ( !$title && isset( $overrides['title'] ) ) { + if ( !( $overrides['title'] instanceof Title ) ) { + throw new MWException( 'title field override must contain a Title object.' ); + } + + $title = $overrides['title']; + } + + if ( !isset( $title ) ) { + if ( isset( $row->ar_namespace ) && isset( $row->ar_title ) ) { + $title = Title::makeTitle( $row->ar_namespace, $row->ar_title ); + } else { + throw new InvalidArgumentException( + 'A Title or ar_namespace and ar_title must be given' + ); + } + } + + foreach ( $overrides as $key => $value ) { + $field = "ar_$key"; + $row->$field = $value; + } + + try { + $user = User::newFromAnyId( + isset( $row->ar_user ) ? $row->ar_user : null, + isset( $row->ar_user_text ) ? $row->ar_user_text : null, + isset( $row->ar_actor ) ? $row->ar_actor : null + ); + } catch ( InvalidArgumentException $ex ) { + wfWarn( __METHOD__ . ': ' . $ex->getMessage() ); + $user = new UserIdentityValue( 0, '', 0 ); + } + + $comment = $this->commentStore + // Legacy because $row may have come from self::selectFields() + ->getCommentLegacy( $this->getDBConnection( DB_REPLICA ), 'ar_comment', $row, true ); + + $mainSlot = $this->emulateMainSlot_1_29( $row, $queryFlags, $title ); + $slots = new RevisionSlots( [ 'main' => $mainSlot ] ); + + return new RevisionArchiveRecord( $title, $user, $comment, $row, $slots, $this->wikiId ); + } + + /** + * @see RevisionFactory::newRevisionFromRow_1_29 + * + * MCR migration note: this replaces Revision::newFromRow + * + * @param object $row + * @param int $queryFlags + * @param Title|null $title + * + * @return RevisionRecord + * @throws MWException + * @throws RevisionAccessException + */ + private function newRevisionFromRow_1_29( $row, $queryFlags = 0, Title $title = null ) { + Assert::parameterType( 'object', $row, '$row' ); + + if ( !$title ) { + $pageId = isset( $row->rev_page ) ? $row->rev_page : 0; // XXX: also check page_id? + $revId = isset( $row->rev_id ) ? $row->rev_id : 0; + + $title = $this->getTitle( $pageId, $revId, $queryFlags ); + } + + if ( !isset( $row->page_latest ) ) { + $row->page_latest = $title->getLatestRevID(); + if ( $row->page_latest === 0 && $title->exists() ) { + wfWarn( 'Encountered title object in limbo: ID ' . $title->getArticleID() ); + } + } + + try { + $user = User::newFromAnyId( + isset( $row->rev_user ) ? $row->rev_user : null, + isset( $row->rev_user_text ) ? $row->rev_user_text : null, + isset( $row->rev_actor ) ? $row->rev_actor : null + ); + } catch ( InvalidArgumentException $ex ) { + wfWarn( __METHOD__ . ': ' . $ex->getMessage() ); + $user = new UserIdentityValue( 0, '', 0 ); + } + + $comment = $this->commentStore + // Legacy because $row may have come from self::selectFields() + ->getCommentLegacy( $this->getDBConnection( DB_REPLICA ), 'rev_comment', $row, true ); + + $mainSlot = $this->emulateMainSlot_1_29( $row, $queryFlags, $title ); + $slots = new RevisionSlots( [ 'main' => $mainSlot ] ); + + return new RevisionStoreRecord( $title, $user, $comment, $row, $slots, $this->wikiId ); + } + + /** + * @see RevisionFactory::newRevisionFromRow + * + * MCR migration note: this replaces Revision::newFromRow + * + * @param object $row + * @param int $queryFlags + * @param Title|null $title + * + * @return RevisionRecord + */ + public function newRevisionFromRow( $row, $queryFlags = 0, Title $title = null ) { + return $this->newRevisionFromRow_1_29( $row, $queryFlags, $title ); + } + + /** + * Constructs a new MutableRevisionRecord based on the given associative array following + * the MW1.29 convention for the Revision constructor. + * + * MCR migration note: this replaces Revision::newFromRow + * + * @param array $fields + * @param int $queryFlags + * @param Title|null $title + * + * @return MutableRevisionRecord + * @throws MWException + * @throws RevisionAccessException + */ + public function newMutableRevisionFromArray( + array $fields, + $queryFlags = 0, + Title $title = null + ) { + if ( !$title && isset( $fields['title'] ) ) { + if ( !( $fields['title'] instanceof Title ) ) { + throw new MWException( 'title field must contain a Title object.' ); + } + + $title = $fields['title']; + } + + if ( !$title ) { + $pageId = isset( $fields['page'] ) ? $fields['page'] : 0; + $revId = isset( $fields['id'] ) ? $fields['id'] : 0; + + $title = $this->getTitle( $pageId, $revId, $queryFlags ); + } + + if ( !isset( $fields['page'] ) ) { + $fields['page'] = $title->getArticleID( $queryFlags ); + } + + // if we have a content object, use it to set the model and type + if ( !empty( $fields['content'] ) ) { + if ( !( $fields['content'] instanceof Content ) ) { + throw new MWException( 'content field must contain a Content object.' ); + } + + if ( !empty( $fields['text_id'] ) ) { + throw new MWException( + "Text already stored in external store (id {$fields['text_id']}), " . + "can't serialize content object" + ); + } + } + + if ( + isset( $fields['comment'] ) + && !( $fields['comment'] instanceof CommentStoreComment ) + ) { + $commentData = isset( $fields['comment_data'] ) ? $fields['comment_data'] : null; + + if ( $fields['comment'] instanceof Message ) { + $fields['comment'] = CommentStoreComment::newUnsavedComment( + $fields['comment'], + $commentData + ); + } else { + $commentText = trim( strval( $fields['comment'] ) ); + $fields['comment'] = CommentStoreComment::newUnsavedComment( + $commentText, + $commentData + ); + } + } + + $mainSlot = $this->emulateMainSlot_1_29( $fields, $queryFlags, $title ); + + $revision = new MutableRevisionRecord( $title, $this->wikiId ); + $this->initializeMutableRevisionFromArray( $revision, $fields ); + $revision->setSlot( $mainSlot ); + + return $revision; + } + + /** + * @param MutableRevisionRecord $record + * @param array $fields + */ + private function initializeMutableRevisionFromArray( + MutableRevisionRecord $record, + array $fields + ) { + /** @var UserIdentity $user */ + $user = null; + + if ( isset( $fields['user'] ) && ( $fields['user'] instanceof UserIdentity ) ) { + $user = $fields['user']; + } else { + try { + $user = User::newFromAnyId( + isset( $fields['user'] ) ? $fields['user'] : null, + isset( $fields['user_text'] ) ? $fields['user_text'] : null, + isset( $fields['actor'] ) ? $fields['actor'] : null + ); + } catch ( InvalidArgumentException $ex ) { + $user = null; + } + } + + if ( $user ) { + $record->setUser( $user ); + } + + $timestamp = isset( $fields['timestamp'] ) + ? strval( $fields['timestamp'] ) + : wfTimestampNow(); // TODO: use a callback, so we can override it for testing. + + $record->setTimestamp( $timestamp ); + + if ( isset( $fields['page'] ) ) { + $record->setPageId( intval( $fields['page'] ) ); + } + + if ( isset( $fields['id'] ) ) { + $record->setId( intval( $fields['id'] ) ); + } + if ( isset( $fields['parent_id'] ) ) { + $record->setParentId( intval( $fields['parent_id'] ) ); + } + + if ( isset( $fields['sha1'] ) ) { + $record->setSha1( $fields['sha1'] ); + } + if ( isset( $fields['size'] ) ) { + $record->setSize( intval( $fields['size'] ) ); + } + + if ( isset( $fields['minor_edit'] ) ) { + $record->setMinorEdit( intval( $fields['minor_edit'] ) !== 0 ); + } + if ( isset( $fields['deleted'] ) ) { + $record->setVisibility( intval( $fields['deleted'] ) ); + } + + if ( isset( $fields['comment'] ) ) { + Assert::parameterType( + CommentStoreComment::class, + $fields['comment'], + '$row[\'comment\']' + ); + $record->setComment( $fields['comment'] ); + } + } + + /** + * Load a page revision from a given revision ID number. + * Returns null if no such revision can be found. + * + * MCR migration note: this corresponds to Revision::loadFromId + * + * @note direct use is deprecated! + * @todo remove when unused! there seem to be no callers of Revision::loadFromId + * + * @param IDatabase $db + * @param int $id + * + * @return RevisionRecord|null + */ + public function loadRevisionFromId( IDatabase $db, $id ) { + return $this->loadRevisionFromConds( $db, [ 'rev_id' => intval( $id ) ] ); + } + + /** + * Load either the current, or a specified, revision + * that's attached to a given page. If not attached + * to that page, will return null. + * + * MCR migration note: this replaces Revision::loadFromPageId + * + * @note direct use is deprecated! + * @todo remove when unused! + * + * @param IDatabase $db + * @param int $pageid + * @param int $id + * @return RevisionRecord|null + */ + public function loadRevisionFromPageId( IDatabase $db, $pageid, $id = 0 ) { + $conds = [ 'rev_page' => intval( $pageid ), 'page_id' => intval( $pageid ) ]; + if ( $id ) { + $conds['rev_id'] = intval( $id ); + } else { + $conds[] = 'rev_id=page_latest'; + } + return $this->loadRevisionFromConds( $db, $conds ); + } + + /** + * Load either the current, or a specified, revision + * that's attached to a given page. If not attached + * to that page, will return null. + * + * MCR migration note: this replaces Revision::loadFromTitle + * + * @note direct use is deprecated! + * @todo remove when unused! + * + * @param IDatabase $db + * @param Title $title + * @param int $id + * + * @return RevisionRecord|null + */ + public function loadRevisionFromTitle( IDatabase $db, $title, $id = 0 ) { + if ( $id ) { + $matchId = intval( $id ); + } else { + $matchId = 'page_latest'; + } + + return $this->loadRevisionFromConds( + $db, + [ + "rev_id=$matchId", + 'page_namespace' => $title->getNamespace(), + 'page_title' => $title->getDBkey() + ], + 0, + $title + ); + } + + /** + * Load the revision for the given title with the given timestamp. + * WARNING: Timestamps may in some circumstances not be unique, + * so this isn't the best key to use. + * + * MCR migration note: this replaces Revision::loadFromTimestamp + * + * @note direct use is deprecated! Use getRevisionFromTimestamp instead! + * @todo remove when unused! + * + * @param IDatabase $db + * @param Title $title + * @param string $timestamp + * @return RevisionRecord|null + */ + public function loadRevisionFromTimestamp( IDatabase $db, $title, $timestamp ) { + return $this->loadRevisionFromConds( $db, + [ + 'rev_timestamp' => $db->timestamp( $timestamp ), + 'page_namespace' => $title->getNamespace(), + 'page_title' => $title->getDBkey() + ], + 0, + $title + ); + } + + /** + * Given a set of conditions, fetch a revision + * + * This method should be used if we are pretty sure the revision exists. + * Unless $flags has READ_LATEST set, this method will first try to find the revision + * on a replica before hitting the master database. + * + * MCR migration note: this corresponds to Revision::newFromConds + * + * @param array $conditions + * @param int $flags (optional) + * @param Title $title + * + * @return RevisionRecord|null + */ + private function newRevisionFromConds( $conditions, $flags = 0, Title $title = null ) { + $db = $this->getDBConnection( ( $flags & self::READ_LATEST ) ? DB_MASTER : DB_REPLICA ); + $rev = $this->loadRevisionFromConds( $db, $conditions, $flags, $title ); + $this->releaseDBConnection( $db ); + + $lb = $this->getDBLoadBalancer(); + + // Make sure new pending/committed revision are visibile later on + // within web requests to certain avoid bugs like T93866 and T94407. + if ( !$rev + && !( $flags & self::READ_LATEST ) + && $lb->getServerCount() > 1 + && $lb->hasOrMadeRecentMasterChanges() + ) { + $flags = self::READ_LATEST; + $db = $this->getDBConnection( DB_MASTER ); + $rev = $this->loadRevisionFromConds( $db, $conditions, $flags, $title ); + $this->releaseDBConnection( $db ); + } + + return $rev; + } + + /** + * Given a set of conditions, fetch a revision from + * the given database connection. + * + * MCR migration note: this corresponds to Revision::loadFromConds + * + * @param IDatabase $db + * @param array $conditions + * @param int $flags (optional) + * @param Title $title + * + * @return RevisionRecord|null + */ + private function loadRevisionFromConds( + IDatabase $db, + $conditions, + $flags = 0, + Title $title = null + ) { + $row = $this->fetchRevisionRowFromConds( $db, $conditions, $flags ); + if ( $row ) { + $rev = $this->newRevisionFromRow( $row, $flags, $title ); + + return $rev; + } + + return null; + } + + /** + * Throws an exception if the given database connection does not belong to the wiki this + * RevisionStore is bound to. + * + * @param IDatabase $db + * @throws MWException + */ + private function checkDatabaseWikiId( IDatabase $db ) { + $storeWiki = $this->wikiId; + $dbWiki = $db->getDomainID(); + + if ( $dbWiki === $storeWiki ) { + return; + } + + // XXX: we really want the default database ID... + $storeWiki = $storeWiki ?: wfWikiID(); + $dbWiki = $dbWiki ?: wfWikiID(); + + if ( $dbWiki === $storeWiki ) { + return; + } + + // HACK: counteract encoding imposed by DatabaseDomain + $storeWiki = str_replace( '?h', '-', $storeWiki ); + $dbWiki = str_replace( '?h', '-', $dbWiki ); + + if ( $dbWiki === $storeWiki ) { + return; + } + + throw new MWException( "RevisionStore for $storeWiki " + . "cannot be used with a DB connection for $dbWiki" ); + } + + /** + * Given a set of conditions, return a row with the + * fields necessary to build RevisionRecord objects. + * + * MCR migration note: this corresponds to Revision::fetchFromConds + * + * @param IDatabase $db + * @param array $conditions + * @param int $flags (optional) + * + * @return object|false data row as a raw object + */ + private function fetchRevisionRowFromConds( IDatabase $db, $conditions, $flags = 0 ) { + $this->checkDatabaseWikiId( $db ); + + $revQuery = self::getQueryInfo( [ 'page', 'user' ] ); + $options = []; + if ( ( $flags & self::READ_LOCKING ) == self::READ_LOCKING ) { + $options[] = 'FOR UPDATE'; + } + return $db->selectRow( + $revQuery['tables'], + $revQuery['fields'], + $conditions, + __METHOD__, + $options, + $revQuery['joins'] + ); + } + + /** + * Return the tables, fields, and join conditions to be selected to create + * a new revision object. + * + * MCR migration note: this replaces Revision::getQueryInfo + * + * @since 1.31 + * + * @param array $options Any combination of the following strings + * - 'page': Join with the page table, and select fields to identify the page + * - 'user': Join with the user table, and select the user name + * - 'text': Join with the text table, and select fields to load page text + * + * @return array With three keys: + * - tables: (string[]) to include in the `$table` to `IDatabase->select()` + * - fields: (string[]) to include in the `$vars` to `IDatabase->select()` + * - joins: (array) to include in the `$join_conds` to `IDatabase->select()` + */ + public function getQueryInfo( $options = [] ) { + $ret = [ + 'tables' => [], + 'fields' => [], + 'joins' => [], + ]; + + $ret['tables'][] = 'revision'; + $ret['fields'] = array_merge( $ret['fields'], [ + 'rev_id', + 'rev_page', + 'rev_text_id', + 'rev_timestamp', + 'rev_minor_edit', + 'rev_deleted', + 'rev_len', + 'rev_parent_id', + 'rev_sha1', + ] ); + + $commentQuery = $this->commentStore->getJoin( 'rev_comment' ); + $ret['tables'] = array_merge( $ret['tables'], $commentQuery['tables'] ); + $ret['fields'] = array_merge( $ret['fields'], $commentQuery['fields'] ); + $ret['joins'] = array_merge( $ret['joins'], $commentQuery['joins'] ); + + $actorQuery = $this->actorMigration->getJoin( 'rev_user' ); + $ret['tables'] = array_merge( $ret['tables'], $actorQuery['tables'] ); + $ret['fields'] = array_merge( $ret['fields'], $actorQuery['fields'] ); + $ret['joins'] = array_merge( $ret['joins'], $actorQuery['joins'] ); + + if ( $this->contentHandlerUseDB ) { + $ret['fields'][] = 'rev_content_format'; + $ret['fields'][] = 'rev_content_model'; + } + + if ( in_array( 'page', $options, true ) ) { + $ret['tables'][] = 'page'; + $ret['fields'] = array_merge( $ret['fields'], [ + 'page_namespace', + 'page_title', + 'page_id', + 'page_latest', + 'page_is_redirect', + 'page_len', + ] ); + $ret['joins']['page'] = [ 'INNER JOIN', [ 'page_id = rev_page' ] ]; + } + + if ( in_array( 'user', $options, true ) ) { + $ret['tables'][] = 'user'; + $ret['fields'] = array_merge( $ret['fields'], [ + 'user_name', + ] ); + $u = $actorQuery['fields']['rev_user']; + $ret['joins']['user'] = [ 'LEFT JOIN', [ "$u != 0", "user_id = $u" ] ]; + } + + if ( in_array( 'text', $options, true ) ) { + $ret['tables'][] = 'text'; + $ret['fields'] = array_merge( $ret['fields'], [ + 'old_text', + 'old_flags' + ] ); + $ret['joins']['text'] = [ 'INNER JOIN', [ 'rev_text_id=old_id' ] ]; + } + + return $ret; + } + + /** + * Return the tables, fields, and join conditions to be selected to create + * a new archived revision object. + * + * MCR migration note: this replaces Revision::getArchiveQueryInfo + * + * @since 1.31 + * + * @return array With three keys: + * - tables: (string[]) to include in the `$table` to `IDatabase->select()` + * - fields: (string[]) to include in the `$vars` to `IDatabase->select()` + * - joins: (array) to include in the `$join_conds` to `IDatabase->select()` + */ + public function getArchiveQueryInfo() { + $commentQuery = $this->commentStore->getJoin( 'ar_comment' ); + $actorQuery = $this->actorMigration->getJoin( 'ar_user' ); + $ret = [ + 'tables' => [ 'archive' ] + $commentQuery['tables'] + $actorQuery['tables'], + 'fields' => [ + 'ar_id', + 'ar_page_id', + 'ar_namespace', + 'ar_title', + 'ar_rev_id', + 'ar_text_id', + 'ar_timestamp', + 'ar_minor_edit', + 'ar_deleted', + 'ar_len', + 'ar_parent_id', + 'ar_sha1', + ] + $commentQuery['fields'] + $actorQuery['fields'], + 'joins' => $commentQuery['joins'] + $actorQuery['joins'], + ]; + + if ( $this->contentHandlerUseDB ) { + $ret['fields'][] = 'ar_content_format'; + $ret['fields'][] = 'ar_content_model'; + } + + return $ret; + } + + /** + * Do a batched query for the sizes of a set of revisions. + * + * MCR migration note: this replaces Revision::getParentLengths + * + * @param int[] $revIds + * @return int[] associative array mapping revision IDs from $revIds to the nominal size + * of the corresponding revision. + */ + public function getRevisionSizes( array $revIds ) { + return $this->listRevisionSizes( $this->getDBConnection( DB_REPLICA ), $revIds ); + } + + /** + * Do a batched query for the sizes of a set of revisions. + * + * MCR migration note: this replaces Revision::getParentLengths + * + * @deprecated use RevisionStore::getRevisionSizes instead. + * + * @param IDatabase $db + * @param int[] $revIds + * @return int[] associative array mapping revision IDs from $revIds to the nominal size + * of the corresponding revision. + */ + public function listRevisionSizes( IDatabase $db, array $revIds ) { + $this->checkDatabaseWikiId( $db ); + + $revLens = []; + if ( !$revIds ) { + return $revLens; // empty + } + + $res = $db->select( + 'revision', + [ 'rev_id', 'rev_len' ], + [ 'rev_id' => $revIds ], + __METHOD__ + ); + + foreach ( $res as $row ) { + $revLens[$row->rev_id] = intval( $row->rev_len ); + } + + return $revLens; + } + + /** + * Get previous revision for this title + * + * MCR migration note: this replaces Revision::getPrevious + * + * @param RevisionRecord $rev + * @param Title $title if known (optional) + * + * @return RevisionRecord|null + */ + public function getPreviousRevision( RevisionRecord $rev, Title $title = null ) { + if ( $title === null ) { + $title = $this->getTitle( $rev->getPageId(), $rev->getId() ); + } + $prev = $title->getPreviousRevisionID( $rev->getId() ); + if ( $prev ) { + return $this->getRevisionByTitle( $title, $prev ); + } + return null; + } + + /** + * Get next revision for this title + * + * MCR migration note: this replaces Revision::getNext + * + * @param RevisionRecord $rev + * @param Title $title if known (optional) + * + * @return RevisionRecord|null + */ + public function getNextRevision( RevisionRecord $rev, Title $title = null ) { + if ( $title === null ) { + $title = $this->getTitle( $rev->getPageId(), $rev->getId() ); + } + $next = $title->getNextRevisionID( $rev->getId() ); + if ( $next ) { + return $this->getRevisionByTitle( $title, $next ); + } + return null; + } + + /** + * Get previous revision Id for this page_id + * This is used to populate rev_parent_id on save + * + * MCR migration note: this corresponds to Revision::getPreviousRevisionId + * + * @param IDatabase $db + * @param RevisionRecord $rev + * + * @return int + */ + private function getPreviousRevisionId( IDatabase $db, RevisionRecord $rev ) { + $this->checkDatabaseWikiId( $db ); + + if ( $rev->getPageId() === null ) { + return 0; + } + # Use page_latest if ID is not given + if ( !$rev->getId() ) { + $prevId = $db->selectField( + 'page', 'page_latest', + [ 'page_id' => $rev->getPageId() ], + __METHOD__ + ); + } else { + $prevId = $db->selectField( + 'revision', 'rev_id', + [ 'rev_page' => $rev->getPageId(), 'rev_id < ' . $rev->getId() ], + __METHOD__, + [ 'ORDER BY' => 'rev_id DESC' ] + ); + } + return intval( $prevId ); + } + + /** + * Get rev_timestamp from rev_id, without loading the rest of the row + * + * MCR migration note: this replaces Revision::getTimestampFromId + * + * @param Title $title + * @param int $id + * @param int $flags + * @return string|bool False if not found + */ + public function getTimestampFromId( $title, $id, $flags = 0 ) { + $db = $this->getDBConnection( + ( $flags & IDBAccessObject::READ_LATEST ) ? DB_MASTER : DB_REPLICA + ); + + $conds = [ 'rev_id' => $id ]; + $conds['rev_page'] = $title->getArticleID(); + $timestamp = $db->selectField( 'revision', 'rev_timestamp', $conds, __METHOD__ ); + + $this->releaseDBConnection( $db ); + return ( $timestamp !== false ) ? wfTimestamp( TS_MW, $timestamp ) : false; + } + + /** + * Get count of revisions per page...not very efficient + * + * MCR migration note: this replaces Revision::countByPageId + * + * @param IDatabase $db + * @param int $id Page id + * @return int + */ + public function countRevisionsByPageId( IDatabase $db, $id ) { + $this->checkDatabaseWikiId( $db ); + + $row = $db->selectRow( 'revision', + [ 'revCount' => 'COUNT(*)' ], + [ 'rev_page' => $id ], + __METHOD__ + ); + if ( $row ) { + return intval( $row->revCount ); + } + return 0; + } + + /** + * Get count of revisions per page...not very efficient + * + * MCR migration note: this replaces Revision::countByTitle + * + * @param IDatabase $db + * @param Title $title + * @return int + */ + public function countRevisionsByTitle( IDatabase $db, $title ) { + $id = $title->getArticleID(); + if ( $id ) { + return $this->countRevisionsByPageId( $db, $id ); + } + return 0; + } + + /** + * Check if no edits were made by other users since + * the time a user started editing the page. Limit to + * 50 revisions for the sake of performance. + * + * MCR migration note: this replaces Revision::userWasLastToEdit + * + * @deprecated since 1.31; Can possibly be removed, since the self-conflict suppression + * logic in EditPage that uses this seems conceptually dubious. Revision::userWasLastToEdit + * has been deprecated since 1.24. + * + * @param IDatabase $db The Database to perform the check on. + * @param int $pageId The ID of the page in question + * @param int $userId The ID of the user in question + * @param string $since Look at edits since this time + * + * @return bool True if the given user was the only one to edit since the given timestamp + */ + public function userWasLastToEdit( IDatabase $db, $pageId, $userId, $since ) { + $this->checkDatabaseWikiId( $db ); + + if ( !$userId ) { + return false; + } + + $revQuery = self::getQueryInfo(); + $res = $db->select( + $revQuery['tables'], + [ + 'rev_user' => $revQuery['fields']['rev_user'], + ], + [ + 'rev_page' => $pageId, + 'rev_timestamp > ' . $db->addQuotes( $db->timestamp( $since ) ) + ], + __METHOD__, + [ 'ORDER BY' => 'rev_timestamp ASC', 'LIMIT' => 50 ], + $revQuery['joins'] + ); + foreach ( $res as $row ) { + if ( $row->rev_user != $userId ) { + return false; + } + } + return true; + } + + /** + * Load a revision based on a known page ID and current revision ID from the DB + * + * This method allows for the use of caching, though accessing anything that normally + * requires permission checks (aside from the text) will trigger a small DB lookup. + * + * MCR migration note: this replaces Revision::newKnownCurrent + * + * @param Title $title the associated page title + * @param int $revId current revision of this page. Defaults to $title->getLatestRevID(). + * + * @return RevisionRecord|bool Returns false if missing + */ + public function getKnownCurrentRevision( Title $title, $revId ) { + $db = $this->getDBConnectionRef( DB_REPLICA ); + + $pageId = $title->getArticleID(); + + if ( !$pageId ) { + return false; + } + + if ( !$revId ) { + $revId = $title->getLatestRevID(); + } + + if ( !$revId ) { + wfWarn( + 'No latest revision known for page ' . $title->getPrefixedDBkey() + . ' even though it exists with page ID ' . $pageId + ); + return false; + } + + $row = $this->cache->getWithSetCallback( + // Page/rev IDs passed in from DB to reflect history merges + $this->cache->makeGlobalKey( 'revision-row-1.29', $db->getDomainID(), $pageId, $revId ), + WANObjectCache::TTL_WEEK, + function ( $curValue, &$ttl, array &$setOpts ) use ( $db, $pageId, $revId ) { + $setOpts += Database::getCacheSetOptions( $db ); + + $conds = [ + 'rev_page' => intval( $pageId ), + 'page_id' => intval( $pageId ), + 'rev_id' => intval( $revId ), + ]; + + $row = $this->fetchRevisionRowFromConds( $db, $conds ); + return $row ?: false; // don't cache negatives + } + ); + + // Reflect revision deletion and user renames + if ( $row ) { + return $this->newRevisionFromRow( $row, 0, $title ); + } else { + return false; + } + } + + // TODO: move relevant methods from Title here, e.g. getFirstRevision, isBigDeletion, etc. + +} diff --git a/www/wiki/includes/Storage/RevisionStoreRecord.php b/www/wiki/includes/Storage/RevisionStoreRecord.php new file mode 100644 index 00000000..d092f22e --- /dev/null +++ b/www/wiki/includes/Storage/RevisionStoreRecord.php @@ -0,0 +1,210 @@ +<?php +/** + * A RevisionRecord representing an existing revision persisted in the revision table. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +use CommentStoreComment; +use InvalidArgumentException; +use MediaWiki\User\UserIdentity; +use Title; +use User; +use Wikimedia\Assert\Assert; + +/** + * A RevisionRecord representing an existing revision persisted in the revision table. + * RevisionStoreRecord has no optional fields, getters will never return null. + * + * @since 1.31 + */ +class RevisionStoreRecord extends RevisionRecord { + + /** @var bool */ + protected $mCurrent = false; + + /** + * @note Avoid calling this constructor directly. Use the appropriate methods + * in RevisionStore instead. + * + * @param Title $title The title of the page this Revision is associated with. + * @param UserIdentity $user + * @param CommentStoreComment $comment + * @param object $row A row from the revision table. Use RevisionStore::getQueryInfo() to build + * a query that yields the required fields. + * @param RevisionSlots $slots The slots of this revision. + * @param bool|string $wikiId the wiki ID of the site this Revision belongs to, + * or false for the local site. + */ + function __construct( + Title $title, + UserIdentity $user, + CommentStoreComment $comment, + $row, + RevisionSlots $slots, + $wikiId = false + ) { + parent::__construct( $title, $slots, $wikiId ); + Assert::parameterType( 'object', $row, '$row' ); + + $this->mId = intval( $row->rev_id ); + $this->mPageId = intval( $row->rev_page ); + $this->mComment = $comment; + + $timestamp = wfTimestamp( TS_MW, $row->rev_timestamp ); + Assert::parameter( is_string( $timestamp ), '$row->rev_timestamp', 'must be a valid timestamp' ); + + $this->mUser = $user; + $this->mMinorEdit = boolval( $row->rev_minor_edit ); + $this->mTimestamp = $timestamp; + $this->mDeleted = intval( $row->rev_deleted ); + + // NOTE: rev_parent_id = 0 indicates that there is no parent revision, while null + // indicates that the parent revision is unknown. As per MW 1.31, the database schema + // allows rev_parent_id to be NULL. + $this->mParentId = isset( $row->rev_parent_id ) ? intval( $row->rev_parent_id ) : null; + $this->mSize = isset( $row->rev_len ) ? intval( $row->rev_len ) : null; + $this->mSha1 = !empty( $row->rev_sha1 ) ? $row->rev_sha1 : null; + + // NOTE: we must not call $this->mTitle->getLatestRevID() here, since the state of + // page_latest may be in limbo during revision creation. In that case, calling + // $this->mTitle->getLatestRevID() would cause a bad value to be cached in the Title + // object. During page creation, that bad value would be 0. + if ( isset( $row->page_latest ) ) { + $this->mCurrent = ( $row->rev_id == $row->page_latest ); + } + + // sanity check + if ( + $this->mPageId && $this->mTitle->exists() + && $this->mPageId !== $this->mTitle->getArticleID() + ) { + throw new InvalidArgumentException( + 'The given Title does not belong to page ID ' . $this->mPageId . + ' but actually belongs to ' . $this->mTitle->getArticleID() + ); + } + } + + /** + * MCR migration note: this replaces Revision::isCurrent + * + * @return bool + */ + public function isCurrent() { + return $this->mCurrent; + } + + /** + * MCR migration note: this replaces Revision::isDeleted + * + * @param int $field One of DELETED_* bitfield constants + * + * @return bool + */ + public function isDeleted( $field ) { + if ( $this->isCurrent() && $field === self::DELETED_TEXT ) { + // Current revisions of pages cannot have the content hidden. Skipping this + // check is very useful for Parser as it fetches templates using newKnownCurrent(). + // Calling getVisibility() in that case triggers a verification database query. + return false; // no need to check + } + + return parent::isDeleted( $field ); + } + + protected function userCan( $field, User $user ) { + if ( $this->isCurrent() && $field === self::DELETED_TEXT ) { + // Current revisions of pages cannot have the content hidden. Skipping this + // check is very useful for Parser as it fetches templates using newKnownCurrent(). + // Calling getVisibility() in that case triggers a verification database query. + return true; // no need to check + } + + return parent::userCan( $field, $user ); + } + + /** + * @return int The revision id, never null. + */ + public function getId() { + // overwritten just to add a guarantee to the contract + return parent::getId(); + } + + /** + * @throws RevisionAccessException if the size was unknown and could not be calculated. + * @return string The nominal revision size, never null. May be computed on the fly. + */ + public function getSize() { + // If length is null, calculate and remember it (potentially SLOW!). + // This is for compatibility with old database rows that don't have the field set. + if ( $this->mSize === null ) { + $this->mSize = $this->mSlots->computeSize(); + } + + return $this->mSize; + } + + /** + * @throws RevisionAccessException if the hash was unknown and could not be calculated. + * @return string The revision hash, never null. May be computed on the fly. + */ + public function getSha1() { + // If hash is null, calculate it and remember (potentially SLOW!) + // This is for compatibility with old database rows that don't have the field set. + if ( $this->mSha1 === null ) { + $this->mSha1 = $this->mSlots->computeSha1(); + } + + return $this->mSha1; + } + + /** + * @param int $audience + * @param User|null $user + * + * @return UserIdentity The identity of the revision author, null if access is forbidden. + */ + public function getUser( $audience = self::FOR_PUBLIC, User $user = null ) { + // overwritten just to add a guarantee to the contract + return parent::getUser( $audience, $user ); + } + + /** + * @param int $audience + * @param User|null $user + * + * @return CommentStoreComment The revision comment, null if access is forbidden. + */ + public function getComment( $audience = self::FOR_PUBLIC, User $user = null ) { + // overwritten just to add a guarantee to the contract + return parent::getComment( $audience, $user ); + } + + /** + * @return string timestamp, never null + */ + public function getTimestamp() { + // overwritten just to add a guarantee to the contract + return parent::getTimestamp(); + } + +} diff --git a/www/wiki/includes/Storage/SlotRecord.php b/www/wiki/includes/Storage/SlotRecord.php new file mode 100644 index 00000000..50d11005 --- /dev/null +++ b/www/wiki/includes/Storage/SlotRecord.php @@ -0,0 +1,568 @@ +<?php +/** + * Value object representing a content slot associated with a page revision. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +use Content; +use InvalidArgumentException; +use LogicException; +use OutOfBoundsException; +use Wikimedia\Assert\Assert; + +/** + * Value object representing a content slot associated with a page revision. + * SlotRecord provides direct access to a Content object. + * That access may be implemented through a callback. + * + * @since 1.31 + */ +class SlotRecord { + + /** + * @var object database result row, as a raw object + */ + private $row; + + /** + * @var Content|callable + */ + private $content; + + /** + * Returns a new SlotRecord just like the given $slot, except that calling getContent() + * will fail with an exception. + * + * @param SlotRecord $slot + * + * @return SlotRecord + */ + public static function newWithSuppressedContent( SlotRecord $slot ) { + $row = $slot->row; + + return new SlotRecord( $row, function () { + throw new SuppressedDataException( 'Content suppressed!' ); + } ); + } + + /** + * Constructs a new SlotRecord from an existing SlotRecord, overriding some fields. + * The slot's content cannot be overwritten. + * + * @param SlotRecord $slot + * @param array $overrides + * + * @return SlotRecord + */ + private static function newDerived( SlotRecord $slot, array $overrides = [] ) { + $row = clone $slot->row; + $row->slot_id = null; // never copy the row ID! + + foreach ( $overrides as $key => $value ) { + $row->$key = $value; + } + + return new SlotRecord( $row, $slot->content ); + } + + /** + * Constructs a new SlotRecord for a new revision, inheriting the content of the given SlotRecord + * of a previous revision. + * + * Note that a SlotRecord constructed this way are intended as prototypes, + * to be used wit newSaved(). They are incomplete, so some getters such as + * getRevision() will fail. + * + * @param SlotRecord $slot + * + * @return SlotRecord + */ + public static function newInherited( SlotRecord $slot ) { + // Sanity check - we can't inherit from a Slot that's not attached to a revision. + $slot->getRevision(); + $slot->getOrigin(); + $slot->getAddress(); + + // NOTE: slot_origin and content_address are copied from $slot. + return self::newDerived( $slot, [ + 'slot_revision_id' => null, + ] ); + } + + /** + * Constructs a new Slot from a Content object for a new revision. + * This is the preferred way to construct a slot for storing Content that + * resulted from a user edit. The slot is assumed to be not inherited. + * + * Note that a SlotRecord constructed this way are intended as prototypes, + * to be used wit newSaved(). They are incomplete, so some getters such as + * getAddress() will fail. + * + * @param string $role + * @param Content $content + * + * @return SlotRecord An incomplete proto-slot object, to be used with newSaved() later. + */ + public static function newUnsaved( $role, Content $content ) { + Assert::parameterType( 'string', $role, '$role' ); + + $row = [ + 'slot_id' => null, // not yet known + 'slot_revision_id' => null, // not yet known + 'slot_origin' => null, // not yet known, will be set in newSaved() + 'content_size' => null, // compute later + 'content_sha1' => null, // compute later + 'slot_content_id' => null, // not yet known, will be set in newSaved() + 'content_address' => null, // not yet known, will be set in newSaved() + 'role_name' => $role, + 'model_name' => $content->getModel(), + ]; + + return new SlotRecord( (object)$row, $content ); + } + + /** + * Constructs a complete SlotRecord for a newly saved revision, based on the incomplete + * proto-slot. This adds information that has only become available during saving, + * particularly the revision ID and content address. + * + * @param int $revisionId the revision the slot is to be associated with (field slot_revision_id). + * If $protoSlot already has a revision, it must be the same. + * @param int $contentId the ID of the row in the content table describing the content + * referenced by $contentAddress (field slot_content_id). + * If $protoSlot already has a content ID, it must be the same. + * @param string $contentAddress the slot's content address (field content_address). + * If $protoSlot already has an address, it must be the same. + * @param SlotRecord $protoSlot The proto-slot that was provided as input for creating a new + * revision. $protoSlot must have a content address if inherited. + * + * @return SlotRecord If the state of $protoSlot is inappropriate for saving a new revision. + */ + public static function newSaved( + $revisionId, + $contentId, + $contentAddress, + SlotRecord $protoSlot + ) { + Assert::parameterType( 'integer', $revisionId, '$revisionId' ); + Assert::parameterType( 'integer', $contentId, '$contentId' ); + Assert::parameterType( 'string', $contentAddress, '$contentAddress' ); + + if ( $protoSlot->hasRevision() && $protoSlot->getRevision() !== $revisionId ) { + throw new LogicException( + "Mismatching revision ID $revisionId: " + . "The slot already belongs to revision {$protoSlot->getRevision()}. " + . "Use SlotRecord::newInherited() to re-use content between revisions." + ); + } + + if ( $protoSlot->hasAddress() && $protoSlot->getAddress() !== $contentAddress ) { + throw new LogicException( + "Mismatching blob address $contentAddress: " + . "The slot already has content at {$protoSlot->getAddress()}." + ); + } + + if ( $protoSlot->hasAddress() && $protoSlot->getContentId() !== $contentId ) { + throw new LogicException( + "Mismatching content ID $contentId: " + . "The slot already has content row {$protoSlot->getContentId()} associated." + ); + } + + if ( $protoSlot->isInherited() ) { + if ( !$protoSlot->hasAddress() ) { + throw new InvalidArgumentException( + "An inherited blob should have a content address!" + ); + } + if ( !$protoSlot->hasField( 'slot_origin' ) ) { + throw new InvalidArgumentException( + "A saved inherited slot should have an origin set!" + ); + } + $origin = $protoSlot->getOrigin(); + } else { + $origin = $revisionId; + } + + return self::newDerived( $protoSlot, [ + 'slot_revision_id' => $revisionId, + 'slot_content_id' => $contentId, + 'slot_origin' => $origin, + 'content_address' => $contentAddress, + ] ); + } + + /** + * SlotRecord constructor. + * + * The following fields are supported by the $row parameter: + * + * $row->blob_data + * $row->blob_address + * + * @param object $row A database row composed of fields of the slot and content tables, + * as a raw object. Any field value can be a callback that produces the field value + * given this SlotRecord as a parameter. However, plain strings cannot be used as + * callbacks here, for security reasons. + * @param Content|callable $content The content object associated with the slot, or a + * callback that will return that Content object, given this SlotRecord as a parameter. + */ + public function __construct( $row, $content ) { + Assert::parameterType( 'object', $row, '$row' ); + Assert::parameterType( 'Content|callable', $content, '$content' ); + + Assert::parameter( + property_exists( $row, 'slot_id' ), + '$row->slot_id', + 'must exist' + ); + Assert::parameter( + property_exists( $row, 'slot_revision_id' ), + '$row->slot_revision_id', + 'must exist' + ); + Assert::parameter( + property_exists( $row, 'slot_content_id' ), + '$row->slot_content_id', + 'must exist' + ); + Assert::parameter( + property_exists( $row, 'content_address' ), + '$row->content_address', + 'must exist' + ); + Assert::parameter( + property_exists( $row, 'model_name' ), + '$row->model_name', + 'must exist' + ); + Assert::parameter( + property_exists( $row, 'slot_origin' ), + '$row->slot_origin', + 'must exist' + ); + Assert::parameter( + !property_exists( $row, 'slot_inherited' ), + '$row->slot_inherited', + 'must not exist' + ); + Assert::parameter( + !property_exists( $row, 'slot_revision' ), + '$row->slot_revision', + 'must not exist' + ); + + $this->row = $row; + $this->content = $content; + } + + /** + * Implemented to defy serialization. + * + * @throws LogicException always + */ + public function __sleep() { + throw new LogicException( __CLASS__ . ' is not serializable.' ); + } + + /** + * Returns the Content of the given slot. + * + * @note This is free to load Content from whatever subsystem is necessary, + * performing potentially expensive operations and triggering I/O-related + * failure modes. + * + * @note This method does not apply audience filtering. + * + * @throws SuppressedDataException if access to the content is not allowed according + * to the audience check performed by RevisionRecord::getSlot(). + * + * @return Content The slot's content. This is a direct reference to the internal instance, + * copy before exposing to application logic! + */ + public function getContent() { + if ( $this->content instanceof Content ) { + return $this->content; + } + + $obj = call_user_func( $this->content, $this ); + + Assert::postcondition( + $obj instanceof Content, + 'Slot content callback should return a Content object' + ); + + $this->content = $obj; + + return $this->content; + } + + /** + * Returns the string value of a data field from the database row supplied to the constructor. + * If the field was set to a callback, that callback is invoked and the result returned. + * + * @param string $name + * + * @throws OutOfBoundsException + * @throws IncompleteRevisionException + * @return mixed Returns the field's value, never null. + */ + private function getField( $name ) { + if ( !isset( $this->row->$name ) ) { + // distinguish between unknown and uninitialized fields + if ( property_exists( $this->row, $name ) ) { + throw new IncompleteRevisionException( 'Uninitialized field: ' . $name ); + } else { + throw new OutOfBoundsException( 'No such field: ' . $name ); + } + } + + $value = $this->row->$name; + + // NOTE: allow callbacks, but don't trust plain string callables from the database! + if ( !is_string( $value ) && is_callable( $value ) ) { + $value = call_user_func( $value, $this ); + $this->setField( $name, $value ); + } + + return $value; + } + + /** + * Returns the string value of a data field from the database row supplied to the constructor. + * + * @param string $name + * + * @throws OutOfBoundsException + * @throws IncompleteRevisionException + * @return string Returns the string value + */ + private function getStringField( $name ) { + return strval( $this->getField( $name ) ); + } + + /** + * Returns the int value of a data field from the database row supplied to the constructor. + * + * @param string $name + * + * @throws OutOfBoundsException + * @throws IncompleteRevisionException + * @return int Returns the int value + */ + private function getIntField( $name ) { + return intval( $this->getField( $name ) ); + } + + /** + * @param string $name + * @return bool whether this record contains the given field + */ + private function hasField( $name ) { + return isset( $this->row->$name ); + } + + /** + * Returns the ID of the revision this slot is associated with. + * + * @return int + */ + public function getRevision() { + return $this->getIntField( 'slot_revision_id' ); + } + + /** + * Returns the revision ID of the revision that originated the slot's content. + * + * @return int + */ + public function getOrigin() { + return $this->getIntField( 'slot_origin' ); + } + + /** + * Whether this slot was inherited from an older revision. + * + * If this SlotRecord is already attached to a revision, this returns true + * if the slot's revision of origin is the same as the revision it belongs to. + * + * If this SlotRecord is not yet attached to a revision, this returns true + * if the slot already has an address. + * + * @return bool + */ + public function isInherited() { + if ( $this->hasRevision() ) { + return $this->getRevision() !== $this->getOrigin(); + } else { + return $this->hasAddress(); + } + } + + /** + * Whether this slot has an address. Slots will have an address if their + * content has been stored. While building a new revision, + * SlotRecords will not have an address associated. + * + * @return bool + */ + public function hasAddress() { + return $this->hasField( 'content_address' ); + } + + /** + * Whether this slot has revision ID associated. Slots will have a revision ID associated + * only if they were loaded as part of an existing revision. While building a new revision, + * Slotrecords will not have a revision ID associated. + * + * @return bool + */ + public function hasRevision() { + return $this->hasField( 'slot_revision_id' ); + } + + /** + * Returns the role of the slot. + * + * @return string + */ + public function getRole() { + return $this->getStringField( 'role_name' ); + } + + /** + * Returns the address of this slot's content. + * This address can be used with BlobStore to load the Content object. + * + * @return string + */ + public function getAddress() { + return $this->getStringField( 'content_address' ); + } + + /** + * Returns the ID of the content meta data row associated with the slot. + * This information should be irrelevant to application logic, it is here to allow + * the construction of a full row for the revision table. + * + * @return int + */ + public function getContentId() { + return $this->getIntField( 'slot_content_id' ); + } + + /** + * Returns the content size + * + * @return int size of the content, in bogo-bytes, as reported by Content::getSize. + */ + public function getSize() { + try { + $size = $this->getIntField( 'content_size' ); + } catch ( IncompleteRevisionException $ex ) { + $size = $this->getContent()->getSize(); + $this->setField( 'content_size', $size ); + } + + return $size; + } + + /** + * Returns the content size + * + * @return string hash of the content. + */ + public function getSha1() { + try { + $sha1 = $this->getStringField( 'content_sha1' ); + } catch ( IncompleteRevisionException $ex ) { + $format = $this->hasField( 'format_name' ) + ? $this->getStringField( 'format_name' ) + : null; + + $data = $this->getContent()->serialize( $format ); + $sha1 = self::base36Sha1( $data ); + $this->setField( 'content_sha1', $sha1 ); + } + + return $sha1; + } + + /** + * Returns the content model. This is the model name that decides + * which ContentHandler is appropriate for interpreting the + * data of the blob referenced by the address returned by getAddress(). + * + * @return string the content model of the content + */ + public function getModel() { + try { + $model = $this->getStringField( 'model_name' ); + } catch ( IncompleteRevisionException $ex ) { + $model = $this->getContent()->getModel(); + $this->setField( 'model_name', $model ); + } + + return $model; + } + + /** + * Returns the blob serialization format as a MIME type. + * + * @note When this method returns null, the caller is expected + * to auto-detect the serialization format, or to rely on + * the default format associated with the content model. + * + * @return string|null + */ + public function getFormat() { + // XXX: we currently do not plan to store the format for each slot! + + if ( $this->hasField( 'format_name' ) ) { + return $this->getStringField( 'format_name' ); + } + + return null; + } + + /** + * @param string $name + * @param string|int|null $value + */ + private function setField( $name, $value ) { + $this->row->$name = $value; + } + + /** + * Get the base 36 SHA-1 value for a string of text + * + * MCR migration note: this replaces Revision::base36Sha1 + * + * @param string $blob + * @return string + */ + public static function base36Sha1( $blob ) { + return \Wikimedia\base_convert( sha1( $blob ), 16, 36, 31 ); + } + +} diff --git a/www/wiki/includes/Storage/SqlBlobStore.php b/www/wiki/includes/Storage/SqlBlobStore.php new file mode 100644 index 00000000..0ff7c133 --- /dev/null +++ b/www/wiki/includes/Storage/SqlBlobStore.php @@ -0,0 +1,600 @@ +<?php +/** + * Service for storing and loading data blobs representing revision content. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * Attribution notice: when this file was created, much of its content was taken + * from the Revision.php file as present in release 1.30. Refer to the history + * of that file for original authorship. + * + * @file + */ + +namespace MediaWiki\Storage; + +use DBAccessObjectUtils; +use ExternalStore; +use IDBAccessObject; +use IExpiringStore; +use InvalidArgumentException; +use Language; +use MWException; +use WANObjectCache; +use Wikimedia\Assert\Assert; +use Wikimedia\Rdbms\Database; +use Wikimedia\Rdbms\IDatabase; +use Wikimedia\Rdbms\LoadBalancer; + +/** + * Service for storing and loading Content objects. + * + * @since 1.31 + * + * @note This was written to act as a drop-in replacement for the corresponding + * static methods in Revision. + */ +class SqlBlobStore implements IDBAccessObject, BlobStore { + + // Note: the name has been taken unchanged from the Revision class. + const TEXT_CACHE_GROUP = 'revisiontext:10'; + + /** + * @var LoadBalancer + */ + private $dbLoadBalancer; + + /** + * @var WANObjectCache + */ + private $cache; + + /** + * @var bool|string Wiki ID + */ + private $wikiId; + + /** + * @var int + */ + private $cacheExpiry = 604800; // 7 days + + /** + * @var bool + */ + private $compressBlobs = false; + + /** + * @var bool|string + */ + private $legacyEncoding = false; + + /** + * @var Language|null + */ + private $legacyEncodingConversionLang = null; + + /** + * @var boolean + */ + private $useExternalStore = false; + + /** + * @param LoadBalancer $dbLoadBalancer A load balancer for acquiring database connections + * @param WANObjectCache $cache A cache manager for caching blobs + * @param bool|string $wikiId The ID of the target wiki database. Use false for the local wiki. + */ + public function __construct( + LoadBalancer $dbLoadBalancer, + WANObjectCache $cache, + $wikiId = false + ) { + $this->dbLoadBalancer = $dbLoadBalancer; + $this->cache = $cache; + $this->wikiId = $wikiId; + } + + /** + * @return int time for which blobs can be cached, in seconds + */ + public function getCacheExpiry() { + return $this->cacheExpiry; + } + + /** + * @param int $cacheExpiry time for which blobs can be cached, in seconds + */ + public function setCacheExpiry( $cacheExpiry ) { + Assert::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' ); + + $this->cacheExpiry = $cacheExpiry; + } + + /** + * @return bool whether blobs should be compressed for storage + */ + public function getCompressBlobs() { + return $this->compressBlobs; + } + + /** + * @param bool $compressBlobs whether blobs should be compressed for storage + */ + public function setCompressBlobs( $compressBlobs ) { + $this->compressBlobs = $compressBlobs; + } + + /** + * @return false|string The legacy encoding to assume for blobs that are not marked as utf8. + * False means handling of legacy encoding is disabled, and utf8 assumed. + */ + public function getLegacyEncoding() { + return $this->legacyEncoding; + } + + /** + * @return Language|null The locale to use when decoding from a legacy encoding, or null + * if handling of legacy encoding is disabled. + */ + public function getLegacyEncodingConversionLang() { + return $this->legacyEncodingConversionLang; + } + + /** + * @param string $legacyEncoding The legacy encoding to assume for blobs that are + * not marked as utf8. + * @param Language $language The locale to use when decoding from a legacy encoding. + */ + public function setLegacyEncoding( $legacyEncoding, Language $language ) { + Assert::parameterType( 'string', $legacyEncoding, '$legacyEncoding' ); + + $this->legacyEncoding = $legacyEncoding; + $this->legacyEncodingConversionLang = $language; + } + + /** + * @return bool Whether to use the ExternalStore mechanism for storing blobs. + */ + public function getUseExternalStore() { + return $this->useExternalStore; + } + + /** + * @param bool $useExternalStore Whether to use the ExternalStore mechanism for storing blobs. + */ + public function setUseExternalStore( $useExternalStore ) { + Assert::parameterType( 'boolean', $useExternalStore, '$useExternalStore' ); + + $this->useExternalStore = $useExternalStore; + } + + /** + * @return LoadBalancer + */ + private function getDBLoadBalancer() { + return $this->dbLoadBalancer; + } + + /** + * @param int $index A database index, like DB_MASTER or DB_REPLICA + * + * @return IDatabase + */ + private function getDBConnection( $index ) { + $lb = $this->getDBLoadBalancer(); + return $lb->getConnection( $index, [], $this->wikiId ); + } + + /** + * Stores an arbitrary blob of data and returns an address that can be used with + * getBlob() to retrieve the same blob of data, + * + * @param string $data + * @param array $hints An array of hints. + * + * @throws BlobAccessException + * @return string an address that can be used with getBlob() to retrieve the data. + */ + public function storeBlob( $data, $hints = [] ) { + try { + $flags = $this->compressData( $data ); + + # Write to external storage if required + if ( $this->useExternalStore ) { + // Store and get the URL + $data = ExternalStore::insertToDefault( $data ); + if ( !$data ) { + throw new BlobAccessException( "Failed to store text to external storage" ); + } + if ( $flags ) { + $flags .= ','; + } + $flags .= 'external'; + + // TODO: we could also return an address for the external store directly here. + // That would mean bypassing the text table entirely when the external store is + // used. We'll need to assess expected fallout before doing that. + } + + $dbw = $this->getDBConnection( DB_MASTER ); + + $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' ); + $dbw->insert( + 'text', + [ + 'old_id' => $old_id, + 'old_text' => $data, + 'old_flags' => $flags, + ], + __METHOD__ + ); + + $textId = $dbw->insertId(); + + return 'tt:' . $textId; + } catch ( MWException $e ) { + throw new BlobAccessException( $e->getMessage(), 0, $e ); + } + } + + /** + * Retrieve a blob, given an address. + * Currently hardcoded to the 'text' table storage engine. + * + * MCR migration note: this replaces Revision::loadText + * + * @param string $blobAddress + * @param int $queryFlags + * + * @throws BlobAccessException + * @return string + */ + public function getBlob( $blobAddress, $queryFlags = 0 ) { + Assert::parameterType( 'string', $blobAddress, '$blobAddress' ); + + // No negative caching; negative hits on text rows may be due to corrupted replica DBs + $blob = $this->cache->getWithSetCallback( + // TODO: change key, since this is not necessarily revision text! + $this->cache->makeKey( 'revisiontext', 'textid', $blobAddress ), + $this->getCacheTTL(), + function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags ) { + list( $index ) = DBAccessObjectUtils::getDBOptions( $queryFlags ); + $setOpts += Database::getCacheSetOptions( $this->getDBConnection( $index ) ); + + return $this->fetchBlob( $blobAddress, $queryFlags ); + }, + [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ] + ); + + if ( $blob === false ) { + throw new BlobAccessException( 'Failed to load blob from address ' . $blobAddress ); + } + + return $blob; + } + + /** + * MCR migration note: this corresponds to Revision::fetchText + * + * @param string $blobAddress + * @param int $queryFlags + * + * @throw BlobAccessException + * @return string|false + */ + private function fetchBlob( $blobAddress, $queryFlags ) { + list( $schema, $id, ) = self::splitBlobAddress( $blobAddress ); + + //TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL! + if ( $schema === 'tt' ) { + $textId = intval( $id ); + } else { + // XXX: change to better exceptions! That makes migration more difficult, though. + throw new BlobAccessException( "Unknown blob address schema: $schema" ); + } + + if ( !$textId || $id !== (string)$textId ) { + // XXX: change to better exceptions! That makes migration more difficult, though. + throw new BlobAccessException( "Bad blob address: $blobAddress" ); + } + + // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables + // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases. + $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST ) + ? self::READ_LATEST_IMMUTABLE + : 0; + + list( $index, $options, $fallbackIndex, $fallbackOptions ) = + DBAccessObjectUtils::getDBOptions( $queryFlags ); + + // Text data is immutable; check replica DBs first. + $row = $this->getDBConnection( $index )->selectRow( + 'text', + [ 'old_text', 'old_flags' ], + [ 'old_id' => $textId ], + __METHOD__, + $options + ); + + // Fallback to DB_MASTER in some cases if the row was not found, using the appropriate + // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ. + if ( !$row && $fallbackIndex !== null ) { + $row = $this->getDBConnection( $fallbackIndex )->selectRow( + 'text', + [ 'old_text', 'old_flags' ], + [ 'old_id' => $textId ], + __METHOD__, + $fallbackOptions + ); + } + + if ( !$row ) { + wfWarn( __METHOD__ . ": No text row with ID $textId." ); + return false; + } + + $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress ); + + if ( $blob === false ) { + wfWarn( __METHOD__ . ": Bad data in text row $textId." ); + return false; + } + + return $blob; + } + + /** + * Expand a raw data blob according to the flags given. + * + * MCR migration note: this replaces Revision::getRevisionText + * + * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead. + * @todo make this private, there should be no need to use this method outside this class. + * + * @param string $raw The raw blob data, to be processed according to $flags. + * May be the blob itself, or the blob compressed, or just the address + * of the actual blob, depending on $flags. + * @param string|string[] $flags Blob flags, such as 'external' or 'gzip'. + * Note that not including 'utf-8' in $flags will cause the data to be decoded + * according to the legacy encoding specified via setLegacyEncoding. + * @param string|null $cacheKey May be used for caching if given + * + * @return false|string The expanded blob or false on failure + */ + public function expandBlob( $raw, $flags, $cacheKey = null ) { + if ( is_string( $flags ) ) { + $flags = explode( ',', $flags ); + } + + // Use external methods for external objects, text in table is URL-only then + if ( in_array( 'external', $flags ) ) { + $url = $raw; + $parts = explode( '://', $url, 2 ); + if ( count( $parts ) == 1 || $parts[1] == '' ) { + return false; + } + + if ( $cacheKey && $this->wikiId === false ) { + // Make use of the wiki-local revision text cache. + // The cached value should be decompressed, so handle that and return here. + // NOTE: we rely on $this->cache being the right cache for $this->wikiId! + return $this->cache->getWithSetCallback( + // TODO: change key, since this is not necessarily revision text! + $this->cache->makeKey( 'revisiontext', 'textid', $cacheKey ), + $this->getCacheTTL(), + function () use ( $url, $flags ) { + // No negative caching per BlobStore::getBlob() + $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] ); + + return $this->decompressData( $blob, $flags ); + }, + [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ] + ); + } else { + $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] ); + return $this->decompressData( $blob, $flags ); + } + } else { + return $this->decompressData( $raw, $flags ); + } + } + + /** + * If $wgCompressRevisions is enabled, we will compress data. + * The input string is modified in place. + * Return value is the flags field: contains 'gzip' if the + * data is compressed, and 'utf-8' if we're saving in UTF-8 + * mode. + * + * MCR migration note: this replaces Revision::compressRevisionText + * + * @note direct use is deprecated! + * @todo make this private, there should be no need to use this method outside this class. + * + * @param mixed &$blob Reference to a text + * + * @return string + */ + public function compressData( &$blob ) { + $blobFlags = []; + + // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData(). + // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be + // risky, since $this->legacyEncoding being set in the future would lead to data corruption. + $blobFlags[] = 'utf-8'; + + if ( $this->compressBlobs ) { + if ( function_exists( 'gzdeflate' ) ) { + $deflated = gzdeflate( $blob ); + + if ( $deflated === false ) { + wfLogWarning( __METHOD__ . ': gzdeflate() failed' ); + } else { + $blob = $deflated; + $blobFlags[] = 'gzip'; + } + } else { + wfDebug( __METHOD__ . " -- no zlib support, not compressing\n" ); + } + } + return implode( ',', $blobFlags ); + } + + /** + * Re-converts revision text according to its flags. + * + * MCR migration note: this replaces Revision::decompressRevisionText + * + * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead. + * @todo make this private, there should be no need to use this method outside this class. + * + * @param mixed $blob Reference to a text + * @param array $blobFlags Compression flags, such as 'gzip'. + * Note that not including 'utf-8' in $blobFlags will cause the data to be decoded + * according to the legacy encoding specified via setLegacyEncoding. + * + * @return string|bool Decompressed text, or false on failure + */ + public function decompressData( $blob, array $blobFlags ) { + if ( $blob === false ) { + // Text failed to be fetched; nothing to do + return false; + } + + if ( in_array( 'error', $blobFlags ) ) { + // Error row, return false + return false; + } + + if ( in_array( 'gzip', $blobFlags ) ) { + # Deal with optional compression of archived pages. + # This can be done periodically via maintenance/compressOld.php, and + # as pages are saved if $wgCompressRevisions is set. + $blob = gzinflate( $blob ); + + if ( $blob === false ) { + wfLogWarning( __METHOD__ . ': gzinflate() failed' ); + return false; + } + } + + if ( in_array( 'object', $blobFlags ) ) { + # Generic compressed storage + $obj = unserialize( $blob ); + if ( !is_object( $obj ) ) { + // Invalid object + return false; + } + $blob = $obj->getText(); + } + + // Needed to support old revisions left over from from the 1.4 / 1.5 migration. + if ( $blob !== false && $this->legacyEncoding && $this->legacyEncodingConversionLang + && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags ) + ) { + # Old revisions kept around in a legacy encoding? + # Upconvert on demand. + # ("utf8" checked for compatibility with some broken + # conversion scripts 2008-12-30) + $blob = $this->legacyEncodingConversionLang->iconv( $this->legacyEncoding, 'UTF-8', $blob ); + } + + return $blob; + } + + /** + * Get the text cache TTL + * + * MCR migration note: this replaces Revision::getCacheTTL + * + * @return int + */ + private function getCacheTTL() { + if ( $this->cache->getQoS( WANObjectCache::ATTR_EMULATION ) + <= WANObjectCache::QOS_EMULATION_SQL + ) { + // Do not cache RDBMs blobs in...the RDBMs store + $ttl = WANObjectCache::TTL_UNCACHEABLE; + } else { + $ttl = $this->cacheExpiry ?: WANObjectCache::TTL_UNCACHEABLE; + } + + return $ttl; + } + + /** + * Returns an ID corresponding to the old_id field in the text table, corresponding + * to the given $address. + * + * Currently, $address must start with 'tt:' followed by a decimal integer representing + * the old_id; if $address does not start with 'tt:', null is returned. However, + * the implementation may change to insert rows into the text table on the fly. + * + * @note This method exists for use with the text table based storage schema. + * It should not be assumed that is will function with all future kinds of content addresses. + * + * @deprecated since 1.31, so not assume that all blob addresses refer to a row in the text + * table. This method should become private once the relevant refactoring in WikiPage is + * complete. + * + * @param string $address + * + * @return int|null + */ + public function getTextIdFromAddress( $address ) { + list( $schema, $id, ) = self::splitBlobAddress( $address ); + + if ( $schema !== 'tt' ) { + return null; + } + + $textId = intval( $id ); + + if ( !$textId || $id !== (string)$textId ) { + throw new InvalidArgumentException( "Malformed text_id: $id" ); + } + + return $textId; + } + + /** + * Splits a blob address into three parts: the schema, the ID, and parameters/flags. + * + * @param string $address + * + * @throws InvalidArgumentException + * @return array [ $schema, $id, $parameters ], with $parameters being an assoc array. + */ + private static function splitBlobAddress( $address ) { + if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) { + throw new InvalidArgumentException( "Bad blob address: $address" ); + } + + $schema = strtolower( $m[1] ); + $id = $m[2]; + $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : []; + + return [ $schema, $id, $parameters ]; + } + + public function isReadOnly() { + if ( $this->useExternalStore && ExternalStore::defaultStoresAreReadOnly() ) { + return true; + } + + return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false ); + } +} diff --git a/www/wiki/includes/Storage/SuppressedDataException.php b/www/wiki/includes/Storage/SuppressedDataException.php new file mode 100644 index 00000000..24f16a64 --- /dev/null +++ b/www/wiki/includes/Storage/SuppressedDataException.php @@ -0,0 +1,33 @@ +<?php +/** + * Exception representing a failure to look up a revision. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Storage; + +/** + * Exception raised in response to an audience check when attempting to + * access suppressed information without permission. + * + * @since 1.31 + */ +class SuppressedDataException extends RevisionAccessException { + +} |