diff options
author | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
---|---|---|
committer | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
commit | fc7369835258467bf97eb64f184b93691f9a9fd5 (patch) | |
tree | daabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/includes/content |
first commit
Diffstat (limited to 'www/wiki/includes/content')
17 files changed, 4652 insertions, 0 deletions
diff --git a/www/wiki/includes/content/AbstractContent.php b/www/wiki/includes/content/AbstractContent.php new file mode 100644 index 00000000..b21eadcd --- /dev/null +++ b/www/wiki/includes/content/AbstractContent.php @@ -0,0 +1,551 @@ +<?php +/** + * A content object represents page content, e.g. the text to show on a page. + * Content objects have no knowledge about how they relate to Wiki pages. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @since 1.21 + * + * @file + * @ingroup Content + * + * @author Daniel Kinzler + */ + +/** + * Base implementation for content objects. + * + * @ingroup Content + */ +abstract class AbstractContent implements Content { + /** + * Name of the content model this Content object represents. + * Use with CONTENT_MODEL_XXX constants + * + * @since 1.21 + * + * @var string $model_id + */ + protected $model_id; + + /** + * @param string $modelId + * + * @since 1.21 + */ + public function __construct( $modelId = null ) { + $this->model_id = $modelId; + } + + /** + * @since 1.21 + * + * @see Content::getModel + * @return string + */ + public function getModel() { + return $this->model_id; + } + + /** + * @since 1.21 + * + * @param string $modelId The model to check + * + * @throws MWException If the provided ID is not the ID of the content model supported by this + * Content object. + */ + protected function checkModelID( $modelId ) { + if ( $modelId !== $this->model_id ) { + throw new MWException( + "Bad content model: " . + "expected {$this->model_id} " . + "but got $modelId." + ); + } + } + + /** + * @since 1.21 + * + * @see Content::getContentHandler + * @return ContentHandler + */ + public function getContentHandler() { + return ContentHandler::getForContent( $this ); + } + + /** + * @since 1.21 + * + * @see Content::getDefaultFormat + * @return string + */ + public function getDefaultFormat() { + return $this->getContentHandler()->getDefaultFormat(); + } + + /** + * @since 1.21 + * + * @see Content::getSupportedFormats + * @return string[] + */ + public function getSupportedFormats() { + return $this->getContentHandler()->getSupportedFormats(); + } + + /** + * @since 1.21 + * + * @param string $format + * + * @return bool + * + * @see Content::isSupportedFormat + */ + public function isSupportedFormat( $format ) { + if ( !$format ) { + return true; // this means "use the default" + } + + return $this->getContentHandler()->isSupportedFormat( $format ); + } + + /** + * @since 1.21 + * + * @param string $format The serialization format to check. + * + * @throws MWException If the format is not supported by this content handler. + */ + protected function checkFormat( $format ) { + if ( !$this->isSupportedFormat( $format ) ) { + throw new MWException( + "Format $format is not supported for content model " . + $this->getModel() + ); + } + } + + /** + * @since 1.21 + * + * @param string $format + * + * @return string + * + * @see Content::serialize + */ + public function serialize( $format = null ) { + return $this->getContentHandler()->serializeContent( $this, $format ); + } + + /** + * @since 1.21 + * + * @return bool + * + * @see Content::isEmpty + */ + public function isEmpty() { + return $this->getSize() === 0; + } + + /** + * Subclasses may override this to implement (light weight) validation. + * + * @since 1.21 + * + * @return bool Always true. + * + * @see Content::isValid + */ + public function isValid() { + return true; + } + + /** + * @since 1.21 + * + * @param Content $that + * + * @return bool + * + * @see Content::equals + */ + public function equals( Content $that = null ) { + if ( is_null( $that ) ) { + return false; + } + + if ( $that === $this ) { + return true; + } + + if ( $that->getModel() !== $this->getModel() ) { + return false; + } + + return $this->getNativeData() === $that->getNativeData(); + } + + /** + * Returns a list of DataUpdate objects for recording information about this + * Content in some secondary data store. + * + * This default implementation returns a LinksUpdate object and calls the + * SecondaryDataUpdates hook. + * + * Subclasses may override this to determine the secondary data updates more + * efficiently, preferably without the need to generate a parser output object. + * They should however make sure to call SecondaryDataUpdates to give extensions + * a chance to inject additional updates. + * + * @since 1.21 + * + * @param Title $title + * @param Content $old + * @param bool $recursive + * @param ParserOutput $parserOutput + * + * @return DataUpdate[] + * + * @see Content::getSecondaryDataUpdates() + */ + public function getSecondaryDataUpdates( Title $title, Content $old = null, + $recursive = true, ParserOutput $parserOutput = null + ) { + if ( $parserOutput === null ) { + $parserOutput = $this->getParserOutput( $title, null, null, false ); + } + + $updates = [ + new LinksUpdate( $title, $parserOutput, $recursive ) + ]; + + Hooks::run( 'SecondaryDataUpdates', [ $title, $old, $recursive, $parserOutput, &$updates ] ); + + return $updates; + } + + /** + * @since 1.21 + * + * @return Title[]|null + * + * @see Content::getRedirectChain + */ + public function getRedirectChain() { + global $wgMaxRedirects; + $title = $this->getRedirectTarget(); + if ( is_null( $title ) ) { + return null; + } + // recursive check to follow double redirects + $recurse = $wgMaxRedirects; + $titles = [ $title ]; + while ( --$recurse > 0 ) { + if ( $title->isRedirect() ) { + $page = WikiPage::factory( $title ); + $newtitle = $page->getRedirectTarget(); + } else { + break; + } + // Redirects to some special pages are not permitted + if ( $newtitle instanceof Title && $newtitle->isValidRedirectTarget() ) { + // The new title passes the checks, so make that our current + // title so that further recursion can be checked + $title = $newtitle; + $titles[] = $newtitle; + } else { + break; + } + } + + return $titles; + } + + /** + * Subclasses that implement redirects should override this. + * + * @since 1.21 + * + * @return Title|null + * + * @see Content::getRedirectTarget + */ + public function getRedirectTarget() { + return null; + } + + /** + * @note Migrated here from Title::newFromRedirectRecurse. + * + * @since 1.21 + * + * @return Title|null + * + * @see Content::getUltimateRedirectTarget + */ + public function getUltimateRedirectTarget() { + $titles = $this->getRedirectChain(); + + return $titles ? array_pop( $titles ) : null; + } + + /** + * @since 1.21 + * + * @return bool + * + * @see Content::isRedirect + */ + public function isRedirect() { + return $this->getRedirectTarget() !== null; + } + + /** + * This default implementation always returns $this. + * Subclasses that implement redirects should override this. + * + * @since 1.21 + * + * @param Title $target + * + * @return Content $this + * + * @see Content::updateRedirect + */ + public function updateRedirect( Title $target ) { + return $this; + } + + /** + * @since 1.21 + * + * @param string|int $sectionId + * @return null + * + * @see Content::getSection + */ + public function getSection( $sectionId ) { + return null; + } + + /** + * @since 1.21 + * + * @param string|int|null|bool $sectionId + * @param Content $with + * @param string $sectionTitle + * @return null + * + * @see Content::replaceSection + */ + public function replaceSection( $sectionId, Content $with, $sectionTitle = '' ) { + return null; + } + + /** + * @since 1.21 + * + * @param Title $title + * @param User $user + * @param ParserOptions $popts + * @return Content $this + * + * @see Content::preSaveTransform + */ + public function preSaveTransform( Title $title, User $user, ParserOptions $popts ) { + return $this; + } + + /** + * @since 1.21 + * + * @param string $header + * @return Content $this + * + * @see Content::addSectionHeader + */ + public function addSectionHeader( $header ) { + return $this; + } + + /** + * @since 1.21 + * + * @param Title $title + * @param ParserOptions $popts + * @param array $params + * @return Content $this + * + * @see Content::preloadTransform + */ + public function preloadTransform( Title $title, ParserOptions $popts, $params = [] ) { + return $this; + } + + /** + * @since 1.21 + * + * @param WikiPage $page + * @param int $flags + * @param int $parentRevId + * @param User $user + * @return Status + * + * @see Content::prepareSave + */ + public function prepareSave( WikiPage $page, $flags, $parentRevId, User $user ) { + if ( $this->isValid() ) { + return Status::newGood(); + } else { + return Status::newFatal( "invalid-content-data" ); + } + } + + /** + * @since 1.21 + * + * @param WikiPage $page + * @param ParserOutput|null $parserOutput + * + * @return DeferrableUpdate[] + * + * @see Content::getDeletionUpdates + */ + public function getDeletionUpdates( WikiPage $page, ParserOutput $parserOutput = null ) { + return [ + new LinksDeletionUpdate( $page ), + ]; + } + + /** + * This default implementation always returns false. Subclasses may override + * this to supply matching logic. + * + * @since 1.21 + * + * @param MagicWord $word + * + * @return bool Always false. + * + * @see Content::matchMagicWord + */ + public function matchMagicWord( MagicWord $word ) { + return false; + } + + /** + * This base implementation calls the hook ConvertContent to enable custom conversions. + * Subclasses may override this to implement conversion for "their" content model. + * + * @param string $toModel + * @param string $lossy + * + * @return Content|bool + * + * @see Content::convert() + */ + public function convert( $toModel, $lossy = '' ) { + if ( $this->getModel() === $toModel ) { + // nothing to do, shorten out. + return $this; + } + + $lossy = ( $lossy === 'lossy' ); // string flag, convert to boolean for convenience + $result = false; + + Hooks::run( 'ConvertContent', [ $this, $toModel, $lossy, &$result ] ); + + return $result; + } + + /** + * Returns a ParserOutput object containing information derived from this content. + * Most importantly, unless $generateHtml was false, the return value contains an + * HTML representation of the content. + * + * Subclasses that want to control the parser output may override this, but it is + * preferred to override fillParserOutput() instead. + * + * Subclasses that override getParserOutput() itself should take care to call the + * ContentGetParserOutput hook. + * + * @since 1.24 + * + * @param Title $title Context title for parsing + * @param int|null $revId Revision ID (for {{REVISIONID}}) + * @param ParserOptions|null $options + * @param bool $generateHtml Whether or not to generate HTML + * + * @return ParserOutput Containing information derived from this content. + */ + public function getParserOutput( Title $title, $revId = null, + ParserOptions $options = null, $generateHtml = true + ) { + if ( $options === null ) { + $options = $this->getContentHandler()->makeParserOptions( 'canonical' ); + } + + $po = new ParserOutput(); + + if ( Hooks::run( 'ContentGetParserOutput', + [ $this, $title, $revId, $options, $generateHtml, &$po ] ) + ) { + // Save and restore the old value, just in case something is reusing + // the ParserOptions object in some weird way. + $oldRedir = $options->getRedirectTarget(); + $options->setRedirectTarget( $this->getRedirectTarget() ); + $this->fillParserOutput( $title, $revId, $options, $generateHtml, $po ); + $options->setRedirectTarget( $oldRedir ); + } + + Hooks::run( 'ContentAlterParserOutput', [ $this, $title, $po ] ); + + return $po; + } + + /** + * Fills the provided ParserOutput with information derived from the content. + * Unless $generateHtml was false, this includes an HTML representation of the content. + * + * This is called by getParserOutput() after consulting the ContentGetParserOutput hook. + * Subclasses are expected to override this method (or getParserOutput(), if need be). + * Subclasses of TextContent should generally override getHtml() instead. + * + * This placeholder implementation always throws an exception. + * + * @since 1.24 + * + * @param Title $title Context title for parsing + * @param int|null $revId Revision ID (for {{REVISIONID}}) + * @param ParserOptions $options + * @param bool $generateHtml Whether or not to generate HTML + * @param ParserOutput &$output The output object to fill (reference). + * + * @throws MWException + */ + protected function fillParserOutput( Title $title, $revId, + ParserOptions $options, $generateHtml, ParserOutput &$output + ) { + // Don't make abstract, so subclasses that override getParserOutput() directly don't fail. + throw new MWException( 'Subclasses of AbstractContent must override fillParserOutput!' ); + } +} diff --git a/www/wiki/includes/content/CodeContentHandler.php b/www/wiki/includes/content/CodeContentHandler.php new file mode 100644 index 00000000..dfd46c8f --- /dev/null +++ b/www/wiki/includes/content/CodeContentHandler.php @@ -0,0 +1,67 @@ +<?php +/** + * Content handler for the pages with code, such as CSS, JavaScript, JSON. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Content + */ + +/** + * Content handler for code content such as CSS, JavaScript, JSON, etc + * @since 1.24 + * @ingroup Content + */ +abstract class CodeContentHandler extends TextContentHandler { + + /** + * Returns the English language, because code is English, and should be handled as such. + * + * @param Title $title + * @param Content $content + * + * @return Language + * + * @see ContentHandler::getPageLanguage() + */ + public function getPageLanguage( Title $title, Content $content = null ) { + return Language::factory( 'en' ); + } + + /** + * Returns the English language, because code is English, and should be handled as such. + * + * @param Title $title + * @param Content $content + * + * @return Language + * + * @see ContentHandler::getPageViewLanguage() + */ + public function getPageViewLanguage( Title $title, Content $content = null ) { + return Language::factory( 'en' ); + } + + /** + * @return string + * @throws MWException + */ + protected function getContentClass() { + throw new MWException( 'Subclass must override' ); + } + +} diff --git a/www/wiki/includes/content/Content.php b/www/wiki/includes/content/Content.php new file mode 100644 index 00000000..3e587828 --- /dev/null +++ b/www/wiki/includes/content/Content.php @@ -0,0 +1,526 @@ +<?php +/** + * A content object represents page content, e.g. the text to show on a page. + * Content objects have no knowledge about how they relate to wiki pages. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @since 1.21 + * + * @file + * @ingroup Content + * + * @author Daniel Kinzler + */ + +/** + * Base interface for content objects. + * + * @ingroup Content + */ +interface Content { + + /** + * @since 1.21 + * + * @return string A string representing the content in a way useful for + * building a full text search index. If no useful representation exists, + * this method returns an empty string. + * + * @todo Test that this actually works + * @todo Make sure this also works with LuceneSearch / WikiSearch + */ + public function getTextForSearchIndex(); + + /** + * @since 1.21 + * + * @return string|bool The wikitext to include when another page includes this + * content, or false if the content is not includable in a wikitext page. + * + * @todo Allow native handling, bypassing wikitext representation, like + * for includable special pages. + * @todo Allow transclusion into other content models than Wikitext! + * @todo Used in WikiPage and MessageCache to get message text. Not so + * nice. What should we use instead?! + */ + public function getWikitextForTransclusion(); + + /** + * Returns a textual representation of the content suitable for use in edit + * summaries and log messages. + * + * @since 1.21 + * + * @param int $maxLength Maximum length of the summary text. + * + * @return string The summary text. + */ + public function getTextForSummary( $maxLength = 250 ); + + /** + * Returns native representation of the data. Interpretation depends on + * the data model used, as given by getDataModel(). + * + * @since 1.21 + * + * @return mixed The native representation of the content. Could be a + * string, a nested array structure, an object, a binary blob... + * anything, really. + * + * @note Caller must be aware of content model! + */ + public function getNativeData(); + + /** + * Returns the content's nominal size in "bogo-bytes". + * + * @return int + */ + public function getSize(); + + /** + * Returns the ID of the content model used by this Content object. + * Corresponds to the CONTENT_MODEL_XXX constants. + * + * @since 1.21 + * + * @return string The model id + */ + public function getModel(); + + /** + * Convenience method that returns the ContentHandler singleton for handling + * the content model that this Content object uses. + * + * Shorthand for ContentHandler::getForContent( $this ) + * + * @since 1.21 + * + * @return ContentHandler + */ + public function getContentHandler(); + + /** + * Convenience method that returns the default serialization format for the + * content model that this Content object uses. + * + * Shorthand for $this->getContentHandler()->getDefaultFormat() + * + * @since 1.21 + * + * @return string + */ + public function getDefaultFormat(); + + /** + * Convenience method that returns the list of serialization formats + * supported for the content model that this Content object uses. + * + * Shorthand for $this->getContentHandler()->getSupportedFormats() + * + * @since 1.21 + * + * @return string[] List of supported serialization formats + */ + public function getSupportedFormats(); + + /** + * Returns true if $format is a supported serialization format for this + * Content object, false if it isn't. + * + * Note that this should always return true if $format is null, because null + * stands for the default serialization. + * + * Shorthand for $this->getContentHandler()->isSupportedFormat( $format ) + * + * @since 1.21 + * + * @param string $format The serialization format to check. + * + * @return bool Whether the format is supported + */ + public function isSupportedFormat( $format ); + + /** + * Convenience method for serializing this Content object. + * + * Shorthand for $this->getContentHandler()->serializeContent( $this, $format ) + * + * @since 1.21 + * + * @param string $format The desired serialization format, or null for the default format. + * + * @return string Serialized form of this Content object. + */ + public function serialize( $format = null ); + + /** + * Returns true if this Content object represents empty content. + * + * @since 1.21 + * + * @return bool Whether this Content object is empty + */ + public function isEmpty(); + + /** + * Returns whether the content is valid. This is intended for local validity + * checks, not considering global consistency. + * + * Content needs to be valid before it can be saved. + * + * This default implementation always returns true. + * + * @since 1.21 + * + * @return bool + */ + public function isValid(); + + /** + * Returns true if this Content objects is conceptually equivalent to the + * given Content object. + * + * Contract: + * + * - Will return false if $that is null. + * - Will return true if $that === $this. + * - Will return false if $that->getModel() != $this->getModel(). + * - Will return false if $that->getNativeData() is not equal to $this->getNativeData(), + * where the meaning of "equal" depends on the actual data model. + * + * Implementations should be careful to make equals() transitive and reflexive: + * + * - $a->equals( $b ) <=> $b->equals( $a ) + * - $a->equals( $b ) && $b->equals( $c ) ==> $a->equals( $c ) + * + * @since 1.21 + * + * @param Content $that The Content object to compare to. + * + * @return bool True if this Content object is equal to $that, false otherwise. + */ + public function equals( Content $that = null ); + + /** + * Return a copy of this Content object. The following must be true for the + * object returned: + * + * if $copy = $original->copy() + * + * - get_class($original) === get_class($copy) + * - $original->getModel() === $copy->getModel() + * - $original->equals( $copy ) + * + * If and only if the Content object is immutable, the copy() method can and + * should return $this. That is, $copy === $original may be true, but only + * for immutable content objects. + * + * @since 1.21 + * + * @return Content A copy of this object + */ + public function copy(); + + /** + * Returns true if this content is countable as a "real" wiki page, provided + * that it's also in a countable location (e.g. a current revision in the + * main namespace). + * + * @since 1.21 + * + * @param bool|null $hasLinks If it is known whether this content contains + * links, provide this information here, to avoid redundant parsing to + * find out. + * + * @return bool + */ + public function isCountable( $hasLinks = null ); + + /** + * Parse the Content object and generate a ParserOutput from the result. + * $result->getText() can be used to obtain the generated HTML. If no HTML + * is needed, $generateHtml can be set to false; in that case, + * $result->getText() may return null. + * + * @note To control which options are used in the cache key for the + * generated parser output, implementations of this method + * may call ParserOutput::recordOption() on the output object. + * + * @param Title $title The page title to use as a context for rendering. + * @param int $revId Optional revision ID being rendered. + * @param ParserOptions $options Any parser options. + * @param bool $generateHtml Whether to generate HTML (default: true). If false, + * the result of calling getText() on the ParserOutput object returned by + * this method is undefined. + * + * @since 1.21 + * + * @return ParserOutput + */ + public function getParserOutput( Title $title, $revId = null, + ParserOptions $options = null, $generateHtml = true ); + + // TODO: make RenderOutput and RenderOptions base classes + + /** + * Returns a list of DataUpdate objects for recording information about this + * Content in some secondary data store. If the optional second argument, + * $old, is given, the updates may model only the changes that need to be + * made to replace information about the old content with information about + * the new content. + * + * This default implementation calls + * $this->getParserOutput( $content, $title, null, null, false ), + * and then calls getSecondaryDataUpdates( $title, $recursive ) on the + * resulting ParserOutput object. + * + * Subclasses may implement this to determine the necessary updates more + * efficiently, or make use of information about the old content. + * + * @note Implementations should call the SecondaryDataUpdates hook, like + * AbstractContent does. + * + * @param Title $title The context for determining the necessary updates + * @param Content $old An optional Content object representing the + * previous content, i.e. the content being replaced by this Content + * object. + * @param bool $recursive Whether to include recursive updates (default: + * false). + * @param ParserOutput $parserOutput Optional ParserOutput object. + * Provide if you have one handy, to avoid re-parsing of the content. + * + * @return DataUpdate[] A list of DataUpdate objects for putting information + * about this content object somewhere. + * + * @since 1.21 + */ + public function getSecondaryDataUpdates( Title $title, Content $old = null, + $recursive = true, ParserOutput $parserOutput = null ); + + /** + * Construct the redirect destination from this content and return an + * array of Titles, or null if this content doesn't represent a redirect. + * The last element in the array is the final destination after all redirects + * have been resolved (up to $wgMaxRedirects times). + * + * @since 1.21 + * + * @return Title[]|null List of Titles, with the destination last. + */ + public function getRedirectChain(); + + /** + * Construct the redirect destination from this content and return a Title, + * or null if this content doesn't represent a redirect. + * This will only return the immediate redirect target, useful for + * the redirect table and other checks that don't need full recursion. + * + * @since 1.21 + * + * @return Title|null The corresponding Title. + */ + public function getRedirectTarget(); + + /** + * Construct the redirect destination from this content and return the + * Title, or null if this content doesn't represent a redirect. + * + * This will recurse down $wgMaxRedirects times or until a non-redirect + * target is hit in order to provide (hopefully) the Title of the final + * destination instead of another redirect. + * + * There is usually no need to override the default behavior, subclasses that + * want to implement redirects should override getRedirectTarget(). + * + * @since 1.21 + * + * @return Title|null + */ + public function getUltimateRedirectTarget(); + + /** + * Returns whether this Content represents a redirect. + * Shorthand for getRedirectTarget() !== null. + * + * @since 1.21 + * + * @return bool + */ + public function isRedirect(); + + /** + * If this Content object is a redirect, this method updates the redirect target. + * Otherwise, it does nothing. + * + * @since 1.21 + * + * @param Title $target The new redirect target + * + * @return Content A new Content object with the updated redirect (or $this + * if this Content object isn't a redirect) + */ + public function updateRedirect( Title $target ); + + /** + * Returns the section with the given ID. + * + * @since 1.21 + * + * @param string|int $sectionId Section identifier as a number or string + * (e.g. 0, 1 or 'T-1'). The ID "0" retrieves the section before the first heading, "1" the + * text between the first heading (included) and the second heading (excluded), etc. + * + * @return Content|bool|null The section, or false if no such section + * exist, or null if sections are not supported. + */ + public function getSection( $sectionId ); + + /** + * Replaces a section of the content and returns a Content object with the + * section replaced. + * + * @since 1.21 + * + * @param string|int|null|bool $sectionId Section identifier as a number or string + * (e.g. 0, 1 or 'T-1'), null/false or an empty string for the whole page + * or 'new' for a new section. + * @param Content $with New content of the section + * @param string $sectionTitle New section's subject, only if $section is 'new' + * + * @return string|null Complete article text, or null if error + */ + public function replaceSection( $sectionId, Content $with, $sectionTitle = '' ); + + /** + * Returns a Content object with pre-save transformations applied (or this + * object if no transformations apply). + * + * @since 1.21 + * + * @param Title $title + * @param User $user + * @param ParserOptions $parserOptions + * + * @return Content + */ + public function preSaveTransform( Title $title, User $user, ParserOptions $parserOptions ); + + /** + * Returns a new WikitextContent object with the given section heading + * prepended, if supported. The default implementation just returns this + * Content object unmodified, ignoring the section header. + * + * @since 1.21 + * + * @param string $header + * + * @return Content + */ + public function addSectionHeader( $header ); + + /** + * Returns a Content object with preload transformations applied (or this + * object if no transformations apply). + * + * @since 1.21 + * + * @param Title $title + * @param ParserOptions $parserOptions + * @param array $params + * + * @return Content + */ + public function preloadTransform( Title $title, ParserOptions $parserOptions, $params = [] ); + + /** + * Prepare Content for saving. Called before Content is saved by WikiPage::doEditContent() and in + * similar places. + * + * This may be used to check the content's consistency with global state. This function should + * NOT write any information to the database. + * + * Note that this method will usually be called inside the same transaction + * bracket that will be used to save the new revision. + * + * Note that this method is called before any update to the page table is + * performed. This means that $page may not yet know a page ID. + * + * @since 1.21 + * + * @param WikiPage $page The page to be saved. + * @param int $flags Bitfield for use with EDIT_XXX constants, see WikiPage::doEditContent() + * @param int $parentRevId The ID of the current revision + * @param User $user + * + * @return Status A status object indicating whether the content was + * successfully prepared for saving. If the returned status indicates + * an error, a rollback will be performed and the transaction aborted. + * + * @see WikiPage::doEditContent() + */ + public function prepareSave( WikiPage $page, $flags, $parentRevId, User $user ); + + /** + * Returns a list of updates to perform when this content is deleted. + * The necessary updates may be taken from the Content object, or depend on + * the current state of the database. + * + * @since 1.21 + * + * @param WikiPage $page The deleted page + * @param ParserOutput|null $parserOutput Optional parser output object + * for efficient access to meta-information about the content object. + * Provide if you have one handy. + * + * @return DeferrableUpdate[] A list of DeferrableUpdate instances that will clean up the + * database after deletion. + */ + public function getDeletionUpdates( WikiPage $page, + ParserOutput $parserOutput = null ); + + /** + * Returns true if this Content object matches the given magic word. + * + * @since 1.21 + * + * @param MagicWord $word The magic word to match + * + * @return bool Whether this Content object matches the given magic word. + */ + public function matchMagicWord( MagicWord $word ); + + /** + * Converts this content object into another content object with the given content model, + * if that is possible. + * + * @param string $toModel The desired content model, use the CONTENT_MODEL_XXX flags. + * @param string $lossy Optional flag, set to "lossy" to allow lossy conversion. If lossy + * conversion is not allowed, full round-trip conversion is expected to work without losing + * information. + * + * @return Content|bool A content object with the content model $toModel, or false if + * that conversion is not supported. + */ + public function convert( $toModel, $lossy = '' ); + // @todo ImagePage and CategoryPage interfere with per-content action handlers + // @todo nice&sane integration of GeSHi syntax highlighting + // [11:59] <vvv> Hooks are ugly; make CodeHighlighter interface and a + // config to set the class which handles syntax highlighting + // [12:00] <vvv> And default it to a DummyHighlighter + +} diff --git a/www/wiki/includes/content/ContentHandler.php b/www/wiki/includes/content/ContentHandler.php new file mode 100644 index 00000000..3cfac8f9 --- /dev/null +++ b/www/wiki/includes/content/ContentHandler.php @@ -0,0 +1,1332 @@ +<?php + +use MediaWiki\Search\ParserOutputSearchDataExtractor; + +/** + * Base class for content handling. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @since 1.21 + * + * @file + * @ingroup Content + * + * @author Daniel Kinzler + */ +/** + * A content handler knows how do deal with a specific type of content on a wiki + * page. Content is stored in the database in a serialized form (using a + * serialization format a.k.a. MIME type) and is unserialized into its native + * PHP representation (the content model), which is wrapped in an instance of + * the appropriate subclass of Content. + * + * ContentHandler instances are stateless singletons that serve, among other + * things, as a factory for Content objects. Generally, there is one subclass + * of ContentHandler and one subclass of Content for every type of content model. + * + * Some content types have a flat model, that is, their native representation + * is the same as their serialized form. Examples would be JavaScript and CSS + * code. As of now, this also applies to wikitext (MediaWiki's default content + * type), but wikitext content may be represented by a DOM or AST structure in + * the future. + * + * @ingroup Content + */ +abstract class ContentHandler { + /** + * Convenience function for getting flat text from a Content object. This + * should only be used in the context of backwards compatibility with code + * that is not yet able to handle Content objects! + * + * If $content is null, this method returns the empty string. + * + * If $content is an instance of TextContent, this method returns the flat + * text as returned by $content->getNativeData(). + * + * If $content is not a TextContent object, the behavior of this method + * depends on the global $wgContentHandlerTextFallback: + * - If $wgContentHandlerTextFallback is 'fail' and $content is not a + * TextContent object, an MWException is thrown. + * - If $wgContentHandlerTextFallback is 'serialize' and $content is not a + * TextContent object, $content->serialize() is called to get a string + * form of the content. + * - If $wgContentHandlerTextFallback is 'ignore' and $content is not a + * TextContent object, this method returns null. + * - otherwise, the behavior is undefined. + * + * @since 1.21 + * + * @param Content $content + * + * @throws MWException If the content is not an instance of TextContent and + * wgContentHandlerTextFallback was set to 'fail'. + * @return string|null Textual form of the content, if available. + */ + public static function getContentText( Content $content = null ) { + global $wgContentHandlerTextFallback; + + if ( is_null( $content ) ) { + return ''; + } + + if ( $content instanceof TextContent ) { + return $content->getNativeData(); + } + + wfDebugLog( 'ContentHandler', 'Accessing ' . $content->getModel() . ' content as text!' ); + + if ( $wgContentHandlerTextFallback == 'fail' ) { + throw new MWException( + "Attempt to get text from Content with model " . + $content->getModel() + ); + } + + if ( $wgContentHandlerTextFallback == 'serialize' ) { + return $content->serialize(); + } + + return null; + } + + /** + * Convenience function for creating a Content object from a given textual + * representation. + * + * $text will be deserialized into a Content object of the model specified + * by $modelId (or, if that is not given, $title->getContentModel()) using + * the given format. + * + * @since 1.21 + * + * @param string $text The textual representation, will be + * unserialized to create the Content object + * @param Title $title The title of the page this text belongs to. + * Required if $modelId is not provided. + * @param string $modelId The model to deserialize to. If not provided, + * $title->getContentModel() is used. + * @param string $format The format to use for deserialization. If not + * given, the model's default format is used. + * + * @throws MWException If model ID or format is not supported or if the text can not be + * unserialized using the format. + * @return Content A Content object representing the text. + */ + public static function makeContent( $text, Title $title = null, + $modelId = null, $format = null ) { + if ( is_null( $modelId ) ) { + if ( is_null( $title ) ) { + throw new MWException( "Must provide a Title object or a content model ID." ); + } + + $modelId = $title->getContentModel(); + } + + $handler = self::getForModelID( $modelId ); + + return $handler->unserializeContent( $text, $format ); + } + + /** + * Returns the name of the default content model to be used for the page + * with the given title. + * + * Note: There should rarely be need to call this method directly. + * To determine the actual content model for a given page, use + * Title::getContentModel(). + * + * Which model is to be used by default for the page is determined based + * on several factors: + * - The global setting $wgNamespaceContentModels specifies a content model + * per namespace. + * - The hook ContentHandlerDefaultModelFor may be used to override the page's default + * model. + * - Pages in NS_MEDIAWIKI and NS_USER default to the CSS or JavaScript + * model if they end in .js or .css, respectively. + * - Pages in NS_MEDIAWIKI default to the wikitext model otherwise. + * - The hook TitleIsCssOrJsPage may be used to force a page to use the CSS + * or JavaScript model. This is a compatibility feature. The ContentHandlerDefaultModelFor + * hook should be used instead if possible. + * - The hook TitleIsWikitextPage may be used to force a page to use the + * wikitext model. This is a compatibility feature. The ContentHandlerDefaultModelFor + * hook should be used instead if possible. + * + * If none of the above applies, the wikitext model is used. + * + * Note: this is used by, and may thus not use, Title::getContentModel() + * + * @since 1.21 + * + * @param Title $title + * + * @return string Default model name for the page given by $title + */ + public static function getDefaultModelFor( Title $title ) { + // NOTE: this method must not rely on $title->getContentModel() directly or indirectly, + // because it is used to initialize the mContentModel member. + + $ns = $title->getNamespace(); + + $ext = false; + $m = null; + $model = MWNamespace::getNamespaceContentModel( $ns ); + + // Hook can determine default model + if ( !Hooks::run( 'ContentHandlerDefaultModelFor', [ $title, &$model ] ) ) { + if ( !is_null( $model ) ) { + return $model; + } + } + + // Could this page contain code based on the title? + $isCodePage = NS_MEDIAWIKI == $ns && preg_match( '!\.(css|js|json)$!u', $title->getText(), $m ); + if ( $isCodePage ) { + $ext = $m[1]; + } + + // Is this a user subpage containing code? + $isCodeSubpage = NS_USER == $ns + && !$isCodePage + && preg_match( "/\\/.*\\.(js|css|json)$/", $title->getText(), $m ); + if ( $isCodeSubpage ) { + $ext = $m[1]; + } + + // Is this wikitext, according to $wgNamespaceContentModels or the DefaultModelFor hook? + $isWikitext = is_null( $model ) || $model == CONTENT_MODEL_WIKITEXT; + $isWikitext = $isWikitext && !$isCodePage && !$isCodeSubpage; + + if ( !$isWikitext ) { + switch ( $ext ) { + case 'js': + return CONTENT_MODEL_JAVASCRIPT; + case 'css': + return CONTENT_MODEL_CSS; + case 'json': + return CONTENT_MODEL_JSON; + default: + return is_null( $model ) ? CONTENT_MODEL_TEXT : $model; + } + } + + // We established that it must be wikitext + + return CONTENT_MODEL_WIKITEXT; + } + + /** + * Returns the appropriate ContentHandler singleton for the given title. + * + * @since 1.21 + * + * @param Title $title + * + * @return ContentHandler + */ + public static function getForTitle( Title $title ) { + $modelId = $title->getContentModel(); + + return self::getForModelID( $modelId ); + } + + /** + * Returns the appropriate ContentHandler singleton for the given Content + * object. + * + * @since 1.21 + * + * @param Content $content + * + * @return ContentHandler + */ + public static function getForContent( Content $content ) { + $modelId = $content->getModel(); + + return self::getForModelID( $modelId ); + } + + /** + * @var array A Cache of ContentHandler instances by model id + */ + protected static $handlers; + + /** + * Returns the ContentHandler singleton for the given model ID. Use the + * CONTENT_MODEL_XXX constants to identify the desired content model. + * + * ContentHandler singletons are taken from the global $wgContentHandlers + * array. Keys in that array are model names, the values are either + * ContentHandler singleton objects, or strings specifying the appropriate + * subclass of ContentHandler. + * + * If a class name is encountered when looking up the singleton for a given + * model name, the class is instantiated and the class name is replaced by + * the resulting singleton in $wgContentHandlers. + * + * If no ContentHandler is defined for the desired $modelId, the + * ContentHandler may be provided by the ContentHandlerForModelID hook. + * If no ContentHandler can be determined, an MWException is raised. + * + * @since 1.21 + * + * @param string $modelId The ID of the content model for which to get a + * handler. Use CONTENT_MODEL_XXX constants. + * + * @throws MWException For internal errors and problems in the configuration. + * @throws MWUnknownContentModelException If no handler is known for the model ID. + * @return ContentHandler The ContentHandler singleton for handling the model given by the ID. + */ + public static function getForModelID( $modelId ) { + global $wgContentHandlers; + + if ( isset( self::$handlers[$modelId] ) ) { + return self::$handlers[$modelId]; + } + + if ( empty( $wgContentHandlers[$modelId] ) ) { + $handler = null; + + Hooks::run( 'ContentHandlerForModelID', [ $modelId, &$handler ] ); + + if ( $handler === null ) { + throw new MWUnknownContentModelException( $modelId ); + } + + if ( !( $handler instanceof ContentHandler ) ) { + throw new MWException( "ContentHandlerForModelID must supply a ContentHandler instance" ); + } + } else { + $classOrCallback = $wgContentHandlers[$modelId]; + + if ( is_callable( $classOrCallback ) ) { + $handler = call_user_func( $classOrCallback, $modelId ); + } else { + $handler = new $classOrCallback( $modelId ); + } + + if ( !( $handler instanceof ContentHandler ) ) { + throw new MWException( "$classOrCallback from \$wgContentHandlers is not " . + "compatible with ContentHandler" ); + } + } + + wfDebugLog( 'ContentHandler', 'Created handler for ' . $modelId + . ': ' . get_class( $handler ) ); + + self::$handlers[$modelId] = $handler; + + return self::$handlers[$modelId]; + } + + /** + * Clean up handlers cache. + */ + public static function cleanupHandlersCache() { + self::$handlers = []; + } + + /** + * Returns the localized name for a given content model. + * + * Model names are localized using system messages. Message keys + * have the form content-model-$name, where $name is getContentModelName( $id ). + * + * @param string $name The content model ID, as given by a CONTENT_MODEL_XXX + * constant or returned by Revision::getContentModel(). + * @param Language|null $lang The language to parse the message in (since 1.26) + * + * @throws MWException If the model ID isn't known. + * @return string The content model's localized name. + */ + public static function getLocalizedName( $name, Language $lang = null ) { + // Messages: content-model-wikitext, content-model-text, + // content-model-javascript, content-model-css + $key = "content-model-$name"; + + $msg = wfMessage( $key ); + if ( $lang ) { + $msg->inLanguage( $lang ); + } + + return $msg->exists() ? $msg->plain() : $name; + } + + public static function getContentModels() { + global $wgContentHandlers; + + $models = array_keys( $wgContentHandlers ); + Hooks::run( 'GetContentModels', [ &$models ] ); + return $models; + } + + public static function getAllContentFormats() { + global $wgContentHandlers; + + $formats = []; + + foreach ( $wgContentHandlers as $model => $class ) { + $handler = self::getForModelID( $model ); + $formats = array_merge( $formats, $handler->getSupportedFormats() ); + } + + $formats = array_unique( $formats ); + + return $formats; + } + + // ------------------------------------------------------------------------ + + /** + * @var string + */ + protected $mModelID; + + /** + * @var string[] + */ + protected $mSupportedFormats; + + /** + * Constructor, initializing the ContentHandler instance with its model ID + * and a list of supported formats. Values for the parameters are typically + * provided as literals by subclass's constructors. + * + * @param string $modelId (use CONTENT_MODEL_XXX constants). + * @param string[] $formats List for supported serialization formats + * (typically as MIME types) + */ + public function __construct( $modelId, $formats ) { + $this->mModelID = $modelId; + $this->mSupportedFormats = $formats; + } + + /** + * Serializes a Content object of the type supported by this ContentHandler. + * + * @since 1.21 + * + * @param Content $content The Content object to serialize + * @param string $format The desired serialization format + * + * @return string Serialized form of the content + */ + abstract public function serializeContent( Content $content, $format = null ); + + /** + * Applies transformations on export (returns the blob unchanged per default). + * Subclasses may override this to perform transformations such as conversion + * of legacy formats or filtering of internal meta-data. + * + * @param string $blob The blob to be exported + * @param string|null $format The blob's serialization format + * + * @return string + */ + public function exportTransform( $blob, $format = null ) { + return $blob; + } + + /** + * Unserializes a Content object of the type supported by this ContentHandler. + * + * @since 1.21 + * + * @param string $blob Serialized form of the content + * @param string $format The format used for serialization + * + * @return Content The Content object created by deserializing $blob + */ + abstract public function unserializeContent( $blob, $format = null ); + + /** + * Apply import transformation (per default, returns $blob unchanged). + * This gives subclasses an opportunity to transform data blobs on import. + * + * @since 1.24 + * + * @param string $blob + * @param string|null $format + * + * @return string + */ + public function importTransform( $blob, $format = null ) { + return $blob; + } + + /** + * Creates an empty Content object of the type supported by this + * ContentHandler. + * + * @since 1.21 + * + * @return Content + */ + abstract public function makeEmptyContent(); + + /** + * Creates a new Content object that acts as a redirect to the given page, + * or null if redirects are not supported by this content model. + * + * This default implementation always returns null. Subclasses supporting redirects + * must override this method. + * + * Note that subclasses that override this method to return a Content object + * should also override supportsRedirects() to return true. + * + * @since 1.21 + * + * @param Title $destination The page to redirect to. + * @param string $text Text to include in the redirect, if possible. + * + * @return Content Always null. + */ + public function makeRedirectContent( Title $destination, $text = '' ) { + return null; + } + + /** + * Returns the model id that identifies the content model this + * ContentHandler can handle. Use with the CONTENT_MODEL_XXX constants. + * + * @since 1.21 + * + * @return string The model ID + */ + public function getModelID() { + return $this->mModelID; + } + + /** + * @since 1.21 + * + * @param string $model_id The model to check + * + * @throws MWException If the model ID is not the ID of the content model supported by this + * ContentHandler. + */ + protected function checkModelID( $model_id ) { + if ( $model_id !== $this->mModelID ) { + throw new MWException( "Bad content model: " . + "expected {$this->mModelID} " . + "but got $model_id." ); + } + } + + /** + * Returns a list of serialization formats supported by the + * serializeContent() and unserializeContent() methods of this + * ContentHandler. + * + * @since 1.21 + * + * @return string[] List of serialization formats as MIME type like strings + */ + public function getSupportedFormats() { + return $this->mSupportedFormats; + } + + /** + * The format used for serialization/deserialization by default by this + * ContentHandler. + * + * This default implementation will return the first element of the array + * of formats that was passed to the constructor. + * + * @since 1.21 + * + * @return string The name of the default serialization format as a MIME type + */ + public function getDefaultFormat() { + return $this->mSupportedFormats[0]; + } + + /** + * Returns true if $format is a serialization format supported by this + * ContentHandler, and false otherwise. + * + * Note that if $format is null, this method always returns true, because + * null means "use the default format". + * + * @since 1.21 + * + * @param string $format The serialization format to check + * + * @return bool + */ + public function isSupportedFormat( $format ) { + if ( !$format ) { + return true; // this means "use the default" + } + + return in_array( $format, $this->mSupportedFormats ); + } + + /** + * Convenient for checking whether a format provided as a parameter is actually supported. + * + * @param string $format The serialization format to check + * + * @throws MWException If the format is not supported by this content handler. + */ + protected function checkFormat( $format ) { + if ( !$this->isSupportedFormat( $format ) ) { + throw new MWException( + "Format $format is not supported for content model " + . $this->getModelID() + ); + } + } + + /** + * Returns overrides for action handlers. + * Classes listed here will be used instead of the default one when + * (and only when) $wgActions[$action] === true. This allows subclasses + * to override the default action handlers. + * + * @since 1.21 + * + * @return array An array mapping action names (typically "view", "edit", "history" etc.) to + * either the full qualified class name of an Action class, a callable taking ( Page $page, + * IContextSource $context = null ) as parameters and returning an Action object, or an actual + * Action object. An empty array in this default implementation. + * + * @see Action::factory + */ + public function getActionOverrides() { + return []; + } + + /** + * Factory for creating an appropriate DifferenceEngine for this content model. + * + * @since 1.21 + * + * @param IContextSource $context Context to use, anything else will be ignored. + * @param int $old Revision ID we want to show and diff with. + * @param int|string $new Either a revision ID or one of the strings 'cur', 'prev' or 'next'. + * @param int $rcid FIXME: Deprecated, no longer used. Defaults to 0. + * @param bool $refreshCache If set, refreshes the diff cache. Defaults to false. + * @param bool $unhide If set, allow viewing deleted revs. Defaults to false. + * + * @return DifferenceEngine + */ + public function createDifferenceEngine( IContextSource $context, $old = 0, $new = 0, + $rcid = 0, // FIXME: Deprecated, no longer used + $refreshCache = false, $unhide = false + ) { + // hook: get difference engine + $differenceEngine = null; + if ( !Hooks::run( 'GetDifferenceEngine', + [ $context, $old, $new, $refreshCache, $unhide, &$differenceEngine ] + ) ) { + return $differenceEngine; + } + $diffEngineClass = $this->getDiffEngineClass(); + return new $diffEngineClass( $context, $old, $new, $rcid, $refreshCache, $unhide ); + } + + /** + * Get the language in which the content of the given page is written. + * + * This default implementation just returns $wgContLang (except for pages + * in the MediaWiki namespace) + * + * Note that the pages language is not cacheable, since it may in some + * cases depend on user settings. + * + * Also note that the page language may or may not depend on the actual content of the page, + * that is, this method may load the content in order to determine the language. + * + * @since 1.21 + * + * @param Title $title The page to determine the language for. + * @param Content $content The page's content, if you have it handy, to avoid reloading it. + * + * @return Language The page's language + */ + public function getPageLanguage( Title $title, Content $content = null ) { + global $wgContLang, $wgLang; + $pageLang = $wgContLang; + + if ( $title->getNamespace() == NS_MEDIAWIKI ) { + // Parse mediawiki messages with correct target language + list( /* $unused */, $lang ) = MessageCache::singleton()->figureMessage( $title->getText() ); + $pageLang = Language::factory( $lang ); + } + + Hooks::run( 'PageContentLanguage', [ $title, &$pageLang, $wgLang ] ); + + return wfGetLangObj( $pageLang ); + } + + /** + * Get the language in which the content of this page is written when + * viewed by user. Defaults to $this->getPageLanguage(), but if the user + * specified a preferred variant, the variant will be used. + * + * This default implementation just returns $this->getPageLanguage( $title, $content ) unless + * the user specified a preferred variant. + * + * Note that the pages view language is not cacheable, since it depends on user settings. + * + * Also note that the page language may or may not depend on the actual content of the page, + * that is, this method may load the content in order to determine the language. + * + * @since 1.21 + * + * @param Title $title The page to determine the language for. + * @param Content $content The page's content, if you have it handy, to avoid reloading it. + * + * @return Language The page's language for viewing + */ + public function getPageViewLanguage( Title $title, Content $content = null ) { + $pageLang = $this->getPageLanguage( $title, $content ); + + if ( $title->getNamespace() !== NS_MEDIAWIKI ) { + // If the user chooses a variant, the content is actually + // in a language whose code is the variant code. + $variant = $pageLang->getPreferredVariant(); + if ( $pageLang->getCode() !== $variant ) { + $pageLang = Language::factory( $variant ); + } + } + + return $pageLang; + } + + /** + * Determines whether the content type handled by this ContentHandler + * can be used on the given page. + * + * This default implementation always returns true. + * Subclasses may override this to restrict the use of this content model to specific locations, + * typically based on the namespace or some other aspect of the title, such as a special suffix + * (e.g. ".svg" for SVG content). + * + * @note this calls the ContentHandlerCanBeUsedOn hook which may be used to override which + * content model can be used where. + * + * @param Title $title The page's title. + * + * @return bool True if content of this kind can be used on the given page, false otherwise. + */ + public function canBeUsedOn( Title $title ) { + $ok = true; + + Hooks::run( 'ContentModelCanBeUsedOn', [ $this->getModelID(), $title, &$ok ] ); + + return $ok; + } + + /** + * Returns the name of the diff engine to use. + * + * @since 1.21 + * + * @return string + */ + protected function getDiffEngineClass() { + return DifferenceEngine::class; + } + + /** + * Attempts to merge differences between three versions. Returns a new + * Content object for a clean merge and false for failure or a conflict. + * + * This default implementation always returns false. + * + * @since 1.21 + * + * @param Content $oldContent The page's previous content. + * @param Content $myContent One of the page's conflicting contents. + * @param Content $yourContent One of the page's conflicting contents. + * + * @return Content|bool Always false. + */ + public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) { + return false; + } + + /** + * Return type of change if one exists for the given edit. + * + * @since 1.31 + * + * @param Content|null $oldContent The previous text of the page. + * @param Content|null $newContent The submitted text of the page. + * @param int $flags Bit mask: a bit mask of flags submitted for the edit. + * + * @return string|null String key representing type of change, or null. + */ + private function getChangeType( + Content $oldContent = null, + Content $newContent = null, + $flags = 0 + ) { + $oldTarget = $oldContent !== null ? $oldContent->getRedirectTarget() : null; + $newTarget = $newContent !== null ? $newContent->getRedirectTarget() : null; + + // We check for the type of change in the given edit, and return string key accordingly + + // Blanking of a page + if ( $oldContent && $oldContent->getSize() > 0 && + $newContent && $newContent->getSize() === 0 + ) { + return 'blank'; + } + + // Redirects + if ( $newTarget ) { + if ( !$oldTarget ) { + // New redirect page (by creating new page or by changing content page) + return 'new-redirect'; + } elseif ( !$newTarget->equals( $oldTarget ) || + $oldTarget->getFragment() !== $newTarget->getFragment() + ) { + // Redirect target changed + return 'changed-redirect-target'; + } + } elseif ( $oldTarget ) { + // Changing an existing redirect into a non-redirect + return 'removed-redirect'; + } + + // New page created + if ( $flags & EDIT_NEW && $newContent ) { + if ( $newContent->getSize() === 0 ) { + // New blank page + return 'newblank'; + } else { + return 'newpage'; + } + } + + // Removing more than 90% of the page + if ( $oldContent && $newContent && $oldContent->getSize() > 10 * $newContent->getSize() ) { + return 'replace'; + } + + // Content model changed + if ( $oldContent && $newContent && $oldContent->getModel() !== $newContent->getModel() ) { + return 'contentmodelchange'; + } + + return null; + } + + /** + * Return an applicable auto-summary if one exists for the given edit. + * + * @since 1.21 + * + * @param Content|null $oldContent The previous text of the page. + * @param Content|null $newContent The submitted text of the page. + * @param int $flags Bit mask: a bit mask of flags submitted for the edit. + * + * @return string An appropriate auto-summary, or an empty string. + */ + public function getAutosummary( + Content $oldContent = null, + Content $newContent = null, + $flags = 0 + ) { + $changeType = $this->getChangeType( $oldContent, $newContent, $flags ); + + // There's no applicable auto-summary for our case, so our auto-summary is empty. + if ( !$changeType ) { + return ''; + } + + // Decide what kind of auto-summary is needed. + switch ( $changeType ) { + case 'new-redirect': + $newTarget = $newContent->getRedirectTarget(); + $truncatedtext = $newContent->getTextForSummary( + 250 + - strlen( wfMessage( 'autoredircomment' )->inContentLanguage()->text() ) + - strlen( $newTarget->getFullText() ) + ); + + return wfMessage( 'autoredircomment', $newTarget->getFullText() ) + ->plaintextParams( $truncatedtext )->inContentLanguage()->text(); + case 'changed-redirect-target': + $oldTarget = $oldContent->getRedirectTarget(); + $newTarget = $newContent->getRedirectTarget(); + + $truncatedtext = $newContent->getTextForSummary( + 250 + - strlen( wfMessage( 'autosumm-changed-redirect-target' ) + ->inContentLanguage()->text() ) + - strlen( $oldTarget->getFullText() ) + - strlen( $newTarget->getFullText() ) + ); + + return wfMessage( 'autosumm-changed-redirect-target', + $oldTarget->getFullText(), + $newTarget->getFullText() ) + ->rawParams( $truncatedtext )->inContentLanguage()->text(); + case 'removed-redirect': + $oldTarget = $oldContent->getRedirectTarget(); + $truncatedtext = $newContent->getTextForSummary( + 250 + - strlen( wfMessage( 'autosumm-removed-redirect' ) + ->inContentLanguage()->text() ) + - strlen( $oldTarget->getFullText() ) ); + + return wfMessage( 'autosumm-removed-redirect', $oldTarget->getFullText() ) + ->rawParams( $truncatedtext )->inContentLanguage()->text(); + case 'newpage': + // If they're making a new article, give its text, truncated, in the summary. + $truncatedtext = $newContent->getTextForSummary( + 200 - strlen( wfMessage( 'autosumm-new' )->inContentLanguage()->text() ) ); + + return wfMessage( 'autosumm-new' )->rawParams( $truncatedtext ) + ->inContentLanguage()->text(); + case 'blank': + return wfMessage( 'autosumm-blank' )->inContentLanguage()->text(); + case 'replace': + $truncatedtext = $newContent->getTextForSummary( + 200 - strlen( wfMessage( 'autosumm-replace' )->inContentLanguage()->text() ) ); + + return wfMessage( 'autosumm-replace' )->rawParams( $truncatedtext ) + ->inContentLanguage()->text(); + case 'newblank': + return wfMessage( 'autosumm-newblank' )->inContentLanguage()->text(); + default: + return ''; + } + } + + /** + * Return an applicable tag if one exists for the given edit or return null. + * + * @since 1.31 + * + * @param Content|null $oldContent The previous text of the page. + * @param Content|null $newContent The submitted text of the page. + * @param int $flags Bit mask: a bit mask of flags submitted for the edit. + * + * @return string|null An appropriate tag, or null. + */ + public function getChangeTag( + Content $oldContent = null, + Content $newContent = null, + $flags = 0 + ) { + $changeType = $this->getChangeType( $oldContent, $newContent, $flags ); + + // There's no applicable tag for this change. + if ( !$changeType ) { + return null; + } + + // Core tags use the same keys as ones returned from $this->getChangeType() + // but prefixed with pseudo namespace 'mw-', so we add the prefix before checking + // if this type of change should be tagged + $tag = 'mw-' . $changeType; + + // Not all change types are tagged, so we check against the list of defined tags. + if ( in_array( $tag, ChangeTags::getSoftwareTags() ) ) { + return $tag; + } + + return null; + } + + /** + * Auto-generates a deletion reason + * + * @since 1.21 + * + * @param Title $title The page's title + * @param bool &$hasHistory Whether the page has a history + * + * @return mixed String containing deletion reason or empty string, or + * boolean false if no revision occurred + * + * @todo &$hasHistory is extremely ugly, it's here because + * WikiPage::getAutoDeleteReason() and Article::generateReason() + * have it / want it. + */ + public function getAutoDeleteReason( Title $title, &$hasHistory ) { + $dbr = wfGetDB( DB_REPLICA ); + + // Get the last revision + $rev = Revision::newFromTitle( $title ); + + if ( is_null( $rev ) ) { + return false; + } + + // Get the article's contents + $content = $rev->getContent(); + $blank = false; + + // If the page is blank, use the text from the previous revision, + // which can only be blank if there's a move/import/protect dummy + // revision involved + if ( !$content || $content->isEmpty() ) { + $prev = $rev->getPrevious(); + + if ( $prev ) { + $rev = $prev; + $content = $rev->getContent(); + $blank = true; + } + } + + $this->checkModelID( $rev->getContentModel() ); + + // Find out if there was only one contributor + // Only scan the last 20 revisions + $revQuery = Revision::getQueryInfo(); + $res = $dbr->select( + $revQuery['tables'], + [ 'rev_user_text' => $revQuery['fields']['rev_user_text'] ], + [ + 'rev_page' => $title->getArticleID(), + $dbr->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0' + ], + __METHOD__, + [ 'LIMIT' => 20 ], + $revQuery['joins'] + ); + + if ( $res === false ) { + // This page has no revisions, which is very weird + return false; + } + + $hasHistory = ( $res->numRows() > 1 ); + $row = $dbr->fetchObject( $res ); + + if ( $row ) { // $row is false if the only contributor is hidden + $onlyAuthor = $row->rev_user_text; + // Try to find a second contributor + foreach ( $res as $row ) { + if ( $row->rev_user_text != $onlyAuthor ) { // T24999 + $onlyAuthor = false; + break; + } + } + } else { + $onlyAuthor = false; + } + + // Generate the summary with a '$1' placeholder + if ( $blank ) { + // The current revision is blank and the one before is also + // blank. It's just not our lucky day + $reason = wfMessage( 'exbeforeblank', '$1' )->inContentLanguage()->text(); + } else { + if ( $onlyAuthor ) { + $reason = wfMessage( + 'excontentauthor', + '$1', + $onlyAuthor + )->inContentLanguage()->text(); + } else { + $reason = wfMessage( 'excontent', '$1' )->inContentLanguage()->text(); + } + } + + if ( $reason == '-' ) { + // Allow these UI messages to be blanked out cleanly + return ''; + } + + // Max content length = max comment length - length of the comment (excl. $1) + $text = $content ? $content->getTextForSummary( 255 - ( strlen( $reason ) - 2 ) ) : ''; + + // Now replace the '$1' placeholder + $reason = str_replace( '$1', $text, $reason ); + + return $reason; + } + + /** + * Get the Content object that needs to be saved in order to undo all revisions + * between $undo and $undoafter. Revisions must belong to the same page, + * must exist and must not be deleted. + * + * @since 1.21 + * + * @param Revision $current The current text + * @param Revision $undo The revision to undo + * @param Revision $undoafter Must be an earlier revision than $undo + * + * @return mixed String on success, false on failure + */ + public function getUndoContent( Revision $current, Revision $undo, Revision $undoafter ) { + $cur_content = $current->getContent(); + + if ( empty( $cur_content ) ) { + return false; // no page + } + + $undo_content = $undo->getContent(); + $undoafter_content = $undoafter->getContent(); + + if ( !$undo_content || !$undoafter_content ) { + return false; // no content to undo + } + + try { + $this->checkModelID( $cur_content->getModel() ); + $this->checkModelID( $undo_content->getModel() ); + if ( $current->getId() !== $undo->getId() ) { + // If we are undoing the most recent revision, + // its ok to revert content model changes. However + // if we are undoing a revision in the middle, then + // doing that will be confusing. + $this->checkModelID( $undoafter_content->getModel() ); + } + } catch ( MWException $e ) { + // If the revisions have different content models + // just return false + return false; + } + + if ( $cur_content->equals( $undo_content ) ) { + // No use doing a merge if it's just a straight revert. + return $undoafter_content; + } + + $undone_content = $this->merge3( $undo_content, $undoafter_content, $cur_content ); + + return $undone_content; + } + + /** + * Get parser options suitable for rendering and caching the article + * + * @param IContextSource|User|string $context One of the following: + * - IContextSource: Use the User and the Language of the provided + * context + * - User: Use the provided User object and $wgLang for the language, + * so use an IContextSource object if possible. + * - 'canonical': Canonical options (anonymous user with default + * preferences and content language). + * + * @throws MWException + * @return ParserOptions + */ + public function makeParserOptions( $context ) { + global $wgContLang; + + if ( $context instanceof IContextSource ) { + $user = $context->getUser(); + $lang = $context->getLanguage(); + } elseif ( $context instanceof User ) { // settings per user (even anons) + $user = $context; + $lang = null; + } elseif ( $context === 'canonical' ) { // canonical settings + $user = new User; + $lang = $wgContLang; + } else { + throw new MWException( "Bad context for parser options: $context" ); + } + + return ParserOptions::newCanonical( $user, $lang ); + } + + /** + * Returns true for content models that support caching using the + * ParserCache mechanism. See WikiPage::shouldCheckParserCache(). + * + * @since 1.21 + * + * @return bool Always false. + */ + public function isParserCacheSupported() { + return false; + } + + /** + * Returns true if this content model supports sections. + * This default implementation returns false. + * + * Content models that return true here should also implement + * Content::getSection, Content::replaceSection, etc. to handle sections.. + * + * @return bool Always false. + */ + public function supportsSections() { + return false; + } + + /** + * Returns true if this content model supports categories. + * The default implementation returns true. + * + * @return bool Always true. + */ + public function supportsCategories() { + return true; + } + + /** + * Returns true if this content model supports redirects. + * This default implementation returns false. + * + * Content models that return true here should also implement + * ContentHandler::makeRedirectContent to return a Content object. + * + * @return bool Always false. + */ + public function supportsRedirects() { + return false; + } + + /** + * Return true if this content model supports direct editing, such as via EditPage. + * + * @return bool Default is false, and true for TextContent and it's derivatives. + */ + public function supportsDirectEditing() { + return false; + } + + /** + * Whether or not this content model supports direct editing via ApiEditPage + * + * @return bool Default is false, and true for TextContent and derivatives. + */ + public function supportsDirectApiEditing() { + return $this->supportsDirectEditing(); + } + + /** + * Get fields definition for search index + * + * @todo Expose title, redirect, namespace, text, source_text, text_bytes + * field mappings here. (see T142670 and T143409) + * + * @param SearchEngine $engine + * @return SearchIndexField[] List of fields this content handler can provide. + * @since 1.28 + */ + public function getFieldsForSearchIndex( SearchEngine $engine ) { + $fields['category'] = $engine->makeSearchFieldMapping( + 'category', + SearchIndexField::INDEX_TYPE_TEXT + ); + $fields['category']->setFlag( SearchIndexField::FLAG_CASEFOLD ); + + $fields['external_link'] = $engine->makeSearchFieldMapping( + 'external_link', + SearchIndexField::INDEX_TYPE_KEYWORD + ); + + $fields['outgoing_link'] = $engine->makeSearchFieldMapping( + 'outgoing_link', + SearchIndexField::INDEX_TYPE_KEYWORD + ); + + $fields['template'] = $engine->makeSearchFieldMapping( + 'template', + SearchIndexField::INDEX_TYPE_KEYWORD + ); + $fields['template']->setFlag( SearchIndexField::FLAG_CASEFOLD ); + + $fields['content_model'] = $engine->makeSearchFieldMapping( + 'content_model', + SearchIndexField::INDEX_TYPE_KEYWORD + ); + + return $fields; + } + + /** + * Add new field definition to array. + * @param SearchIndexField[] &$fields + * @param SearchEngine $engine + * @param string $name + * @param int $type + * @return SearchIndexField[] new field defs + * @since 1.28 + */ + protected function addSearchField( &$fields, SearchEngine $engine, $name, $type ) { + $fields[$name] = $engine->makeSearchFieldMapping( $name, $type ); + return $fields; + } + + /** + * Return fields to be indexed by search engine + * as representation of this document. + * Overriding class should call parent function or take care of calling + * the SearchDataForIndex hook. + * @param WikiPage $page Page to index + * @param ParserOutput $output + * @param SearchEngine $engine Search engine for which we are indexing + * @return array Map of name=>value for fields + * @since 1.28 + */ + public function getDataForSearchIndex( + WikiPage $page, + ParserOutput $output, + SearchEngine $engine + ) { + $fieldData = []; + $content = $page->getContent(); + + if ( $content ) { + $searchDataExtractor = new ParserOutputSearchDataExtractor(); + + $fieldData['category'] = $searchDataExtractor->getCategories( $output ); + $fieldData['external_link'] = $searchDataExtractor->getExternalLinks( $output ); + $fieldData['outgoing_link'] = $searchDataExtractor->getOutgoingLinks( $output ); + $fieldData['template'] = $searchDataExtractor->getTemplates( $output ); + + $text = $content->getTextForSearchIndex(); + + $fieldData['text'] = $text; + $fieldData['source_text'] = $text; + $fieldData['text_bytes'] = $content->getSize(); + $fieldData['content_model'] = $content->getModel(); + } + + Hooks::run( 'SearchDataForIndex', [ &$fieldData, $this, $page, $output, $engine ] ); + return $fieldData; + } + + /** + * Produce page output suitable for indexing. + * + * Specific content handlers may override it if they need different content handling. + * + * @param WikiPage $page + * @param ParserCache $cache + * @return ParserOutput + */ + public function getParserOutputForIndexing( WikiPage $page, ParserCache $cache = null ) { + $parserOptions = $page->makeParserOptions( 'canonical' ); + $revId = $page->getRevision()->getId(); + if ( $cache ) { + $parserOutput = $cache->get( $page, $parserOptions ); + } + if ( empty( $parserOutput ) ) { + $parserOutput = + $page->getContent()->getParserOutput( $page->getTitle(), $revId, $parserOptions ); + if ( $cache ) { + $cache->save( $parserOutput, $page, $parserOptions ); + } + } + return $parserOutput; + } + +} diff --git a/www/wiki/includes/content/CssContent.php b/www/wiki/includes/content/CssContent.php new file mode 100644 index 00000000..b4f5196d --- /dev/null +++ b/www/wiki/includes/content/CssContent.php @@ -0,0 +1,121 @@ +<?php +/** + * Content object for CSS pages. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @since 1.21 + * + * @file + * @ingroup Content + * + * @author Daniel Kinzler + */ + +/** + * Content object for CSS pages. + * + * @ingroup Content + */ +class CssContent extends TextContent { + + /** + * @var bool|Title|null + */ + private $redirectTarget = false; + + /** + * @param string $text CSS code. + * @param string $modelId the content content model + */ + public function __construct( $text, $modelId = CONTENT_MODEL_CSS ) { + parent::__construct( $text, $modelId ); + } + + /** + * Returns a Content object with pre-save transformations applied using + * Parser::preSaveTransform(). + * + * @param Title $title + * @param User $user + * @param ParserOptions $popts + * + * @return CssContent + * + * @see TextContent::preSaveTransform + */ + public function preSaveTransform( Title $title, User $user, ParserOptions $popts ) { + global $wgParser; + // @todo Make pre-save transformation optional for script pages + + $text = $this->getNativeData(); + $pst = $wgParser->preSaveTransform( $text, $title, $user, $popts ); + + return new static( $pst ); + } + + /** + * @return string CSS wrapped in a <pre> tag. + */ + protected function getHtml() { + $html = ""; + $html .= "<pre class=\"mw-code mw-css\" dir=\"ltr\">\n"; + $html .= htmlspecialchars( $this->getNativeData() ); + $html .= "\n</pre>\n"; + + return $html; + } + + /** + * @param Title $target + * @return CssContent + */ + public function updateRedirect( Title $target ) { + if ( !$this->isRedirect() ) { + return $this; + } + + return $this->getContentHandler()->makeRedirectContent( $target ); + } + + /** + * @return Title|null + */ + public function getRedirectTarget() { + if ( $this->redirectTarget !== false ) { + return $this->redirectTarget; + } + $this->redirectTarget = null; + $text = $this->getNativeData(); + if ( strpos( $text, '/* #REDIRECT */' ) === 0 ) { + // Extract the title from the url + preg_match( '/title=(.*?)&action=raw/', $text, $matches ); + if ( isset( $matches[1] ) ) { + $title = Title::newFromText( $matches[1] ); + if ( $title ) { + // Have a title, check that the current content equals what + // the redirect content should be + if ( $this->equals( $this->getContentHandler()->makeRedirectContent( $title ) ) ) { + $this->redirectTarget = $title; + } + } + } + } + + return $this->redirectTarget; + } + +} diff --git a/www/wiki/includes/content/CssContentHandler.php b/www/wiki/includes/content/CssContentHandler.php new file mode 100644 index 00000000..9c110353 --- /dev/null +++ b/www/wiki/includes/content/CssContentHandler.php @@ -0,0 +1,61 @@ +<?php +/** + * Content handler for CSS pages. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Content + */ + +/** + * Content handler for CSS pages. + * + * @since 1.21 + * @ingroup Content + */ +class CssContentHandler extends CodeContentHandler { + + /** + * @param string $modelId + */ + public function __construct( $modelId = CONTENT_MODEL_CSS ) { + parent::__construct( $modelId, [ CONTENT_FORMAT_CSS ] ); + } + + protected function getContentClass() { + return CssContent::class; + } + + public function supportsRedirects() { + return true; + } + + /** + * Create a redirect that is also valid CSS + * + * @param Title $destination + * @param string $text ignored + * @return CssContent + */ + public function makeRedirectContent( Title $destination, $text = '' ) { + // The parameters are passed as a string so the / is not url-encoded by wfArrayToCgi + $url = $destination->getFullURL( 'action=raw&ctype=text/css', false, PROTO_RELATIVE ); + $class = $this->getContentClass(); + return new $class( '/* #REDIRECT */@import ' . CSSMin::buildUrlValue( $url ) . ';' ); + } + +} diff --git a/www/wiki/includes/content/FileContentHandler.php b/www/wiki/includes/content/FileContentHandler.php new file mode 100644 index 00000000..3028dfda --- /dev/null +++ b/www/wiki/includes/content/FileContentHandler.php @@ -0,0 +1,65 @@ +<?php + +/** + * Content handler for File: files + * TODO: this handler s not used directly now, + * but instead manually called by WikitextHandler. + * This should be fixed in the future. + */ +class FileContentHandler extends WikitextContentHandler { + + public function getFieldsForSearchIndex( SearchEngine $engine ) { + $fields['file_media_type'] = + $engine->makeSearchFieldMapping( 'file_media_type', SearchIndexField::INDEX_TYPE_KEYWORD ); + $fields['file_media_type']->setFlag( SearchIndexField::FLAG_CASEFOLD ); + $fields['file_mime'] = + $engine->makeSearchFieldMapping( 'file_mime', SearchIndexField::INDEX_TYPE_SHORT_TEXT ); + $fields['file_mime']->setFlag( SearchIndexField::FLAG_CASEFOLD ); + $fields['file_size'] = + $engine->makeSearchFieldMapping( 'file_size', SearchIndexField::INDEX_TYPE_INTEGER ); + $fields['file_width'] = + $engine->makeSearchFieldMapping( 'file_width', SearchIndexField::INDEX_TYPE_INTEGER ); + $fields['file_height'] = + $engine->makeSearchFieldMapping( 'file_height', SearchIndexField::INDEX_TYPE_INTEGER ); + $fields['file_bits'] = + $engine->makeSearchFieldMapping( 'file_bits', SearchIndexField::INDEX_TYPE_INTEGER ); + $fields['file_resolution'] = + $engine->makeSearchFieldMapping( 'file_resolution', SearchIndexField::INDEX_TYPE_INTEGER ); + $fields['file_text'] = + $engine->makeSearchFieldMapping( 'file_text', SearchIndexField::INDEX_TYPE_TEXT ); + return $fields; + } + + public function getDataForSearchIndex( + WikiPage $page, + ParserOutput $parserOutput, + SearchEngine $engine + ) { + $fields = []; + + $title = $page->getTitle(); + if ( NS_FILE != $title->getNamespace() ) { + return []; + } + $file = wfLocalFile( $title ); + if ( !$file || !$file->exists() ) { + return []; + } + + $handler = $file->getHandler(); + if ( $handler ) { + $fields['file_text'] = $handler->getEntireText( $file ); + } + $fields['file_media_type'] = $file->getMediaType(); + $fields['file_mime'] = $file->getMimeType(); + $fields['file_size'] = $file->getSize(); + $fields['file_width'] = $file->getWidth(); + $fields['file_height'] = $file->getHeight(); + $fields['file_bits'] = $file->getBitDepth(); + $fields['file_resolution'] = + (int)floor( sqrt( $fields['file_width'] * $fields['file_height'] ) ); + + return $fields; + } + +} diff --git a/www/wiki/includes/content/JavaScriptContent.php b/www/wiki/includes/content/JavaScriptContent.php new file mode 100644 index 00000000..6d236560 --- /dev/null +++ b/www/wiki/includes/content/JavaScriptContent.php @@ -0,0 +1,123 @@ +<?php +/** + * Content for JavaScript pages. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @since 1.21 + * + * @file + * @ingroup Content + * + * @author Daniel Kinzler + */ + +/** + * Content for JavaScript pages. + * + * @ingroup Content + */ +class JavaScriptContent extends TextContent { + + /** + * @var bool|Title|null + */ + private $redirectTarget = false; + + /** + * @param string $text JavaScript code. + * @param string $modelId the content model name + */ + public function __construct( $text, $modelId = CONTENT_MODEL_JAVASCRIPT ) { + parent::__construct( $text, $modelId ); + } + + /** + * Returns a Content object with pre-save transformations applied using + * Parser::preSaveTransform(). + * + * @param Title $title + * @param User $user + * @param ParserOptions $popts + * + * @return JavaScriptContent + */ + public function preSaveTransform( Title $title, User $user, ParserOptions $popts ) { + global $wgParser; + // @todo Make pre-save transformation optional for script pages + // See bug #32858 + + $text = $this->getNativeData(); + $pst = $wgParser->preSaveTransform( $text, $title, $user, $popts ); + + return new static( $pst ); + } + + /** + * @return string JavaScript wrapped in a <pre> tag. + */ + protected function getHtml() { + $html = ""; + $html .= "<pre class=\"mw-code mw-js\" dir=\"ltr\">\n"; + $html .= htmlspecialchars( $this->getNativeData() ); + $html .= "\n</pre>\n"; + + return $html; + } + + /** + * If this page is a redirect, return the content + * if it should redirect to $target instead + * + * @param Title $target + * @return JavaScriptContent + */ + public function updateRedirect( Title $target ) { + if ( !$this->isRedirect() ) { + return $this; + } + + return $this->getContentHandler()->makeRedirectContent( $target ); + } + + /** + * @return Title|null + */ + public function getRedirectTarget() { + if ( $this->redirectTarget !== false ) { + return $this->redirectTarget; + } + $this->redirectTarget = null; + $text = $this->getNativeData(); + if ( strpos( $text, '/* #REDIRECT */' ) === 0 ) { + // Extract the title from the url + preg_match( '/title=(.*?)\\\\u0026action=raw/', $text, $matches ); + if ( isset( $matches[1] ) ) { + $title = Title::newFromText( $matches[1] ); + if ( $title ) { + // Have a title, check that the current content equals what + // the redirect content should be + if ( $this->equals( $this->getContentHandler()->makeRedirectContent( $title ) ) ) { + $this->redirectTarget = $title; + } + } + } + } + + return $this->redirectTarget; + } + +} diff --git a/www/wiki/includes/content/JavaScriptContentHandler.php b/www/wiki/includes/content/JavaScriptContentHandler.php new file mode 100644 index 00000000..9abad3e2 --- /dev/null +++ b/www/wiki/includes/content/JavaScriptContentHandler.php @@ -0,0 +1,62 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * Content handler for JavaScript pages. + * + * @todo Create a ScriptContentHandler base class, do highlighting stuff there? + * + * @since 1.21 + * @ingroup Content + */ +class JavaScriptContentHandler extends CodeContentHandler { + + /** + * @param string $modelId + */ + public function __construct( $modelId = CONTENT_MODEL_JAVASCRIPT ) { + parent::__construct( $modelId, [ CONTENT_FORMAT_JAVASCRIPT ] ); + } + + /** + * @return string + */ + protected function getContentClass() { + return JavaScriptContent::class; + } + + public function supportsRedirects() { + return true; + } + + /** + * Create a redirect that is also valid JavaScript + * + * @param Title $destination + * @param string $text ignored + * @return JavaScriptContent + */ + public function makeRedirectContent( Title $destination, $text = '' ) { + // The parameters are passed as a string so the / is not url-encoded by wfArrayToCgi + $url = $destination->getFullURL( 'action=raw&ctype=text/javascript', false, PROTO_RELATIVE ); + $class = $this->getContentClass(); + return new $class( '/* #REDIRECT */' . Xml::encodeJsCall( 'mw.loader.load', [ $url ] ) ); + } +} diff --git a/www/wiki/includes/content/JsonContent.php b/www/wiki/includes/content/JsonContent.php new file mode 100644 index 00000000..7d8f67ce --- /dev/null +++ b/www/wiki/includes/content/JsonContent.php @@ -0,0 +1,251 @@ +<?php +/** + * JSON Content Model + * + * @file + * + * @author Ori Livneh <ori@wikimedia.org> + * @author Kunal Mehta <legoktm@gmail.com> + */ + +/** + * Represents the content of a JSON content. + * @since 1.24 + */ +class JsonContent extends TextContent { + + /** + * @since 1.25 + * @var Status + */ + protected $jsonParse; + + /** + * @param string $text JSON + * @param string $modelId + */ + public function __construct( $text, $modelId = CONTENT_MODEL_JSON ) { + parent::__construct( $text, $modelId ); + } + + /** + * Decodes the JSON into a PHP associative array. + * + * @deprecated since 1.25 Use getData instead. + * @return array|null + */ + public function getJsonData() { + wfDeprecated( __METHOD__, '1.25' ); + return FormatJson::decode( $this->getNativeData(), true ); + } + + /** + * Decodes the JSON string. + * + * Note that this parses it without casting objects to associative arrays. + * Objects and arrays are kept as distinguishable types in the PHP values. + * + * @return Status + */ + public function getData() { + if ( $this->jsonParse === null ) { + $this->jsonParse = FormatJson::parse( $this->getNativeData() ); + } + return $this->jsonParse; + } + + /** + * @return bool Whether content is valid. + */ + public function isValid() { + return $this->getData()->isGood(); + } + + /** + * Pretty-print JSON. + * + * If called before validation, it may return JSON "null". + * + * @return string + */ + public function beautifyJSON() { + return FormatJson::encode( $this->getData()->getValue(), true, FormatJson::UTF8_OK ); + } + + /** + * Beautifies JSON prior to save. + * + * @param Title $title + * @param User $user + * @param ParserOptions $popts + * @return JsonContent + */ + public function preSaveTransform( Title $title, User $user, ParserOptions $popts ) { + // FIXME: WikiPage::doEditContent invokes PST before validation. As such, native data + // may be invalid (though PST result is discarded later in that case). + if ( !$this->isValid() ) { + return $this; + } + + return new static( self::normalizeLineEndings( $this->beautifyJSON() ) ); + } + + /** + * Set the HTML and add the appropriate styles. + * + * @param Title $title + * @param int $revId + * @param ParserOptions $options + * @param bool $generateHtml + * @param ParserOutput &$output + */ + protected function fillParserOutput( Title $title, $revId, + ParserOptions $options, $generateHtml, ParserOutput &$output + ) { + // FIXME: WikiPage::doEditContent generates parser output before validation. + // As such, native data may be invalid (though output is discarded later in that case). + if ( $generateHtml && $this->isValid() ) { + $output->setText( $this->rootValueTable( $this->getData()->getValue() ) ); + $output->addModuleStyles( 'mediawiki.content.json' ); + } else { + $output->setText( '' ); + } + } + + /** + * Construct HTML table representation of any JSON value. + * + * See also valueCell, which is similar. + * + * @param mixed $val + * @return string HTML. + */ + protected function rootValueTable( $val ) { + if ( is_object( $val ) ) { + return $this->objectTable( $val ); + } + + if ( is_array( $val ) ) { + // Wrap arrays in another array so that they're visually boxed in a container. + // Otherwise they are visually indistinguishable from a single value. + return $this->arrayTable( [ $val ] ); + } + + return Html::rawElement( 'table', [ 'class' => 'mw-json mw-json-single-value' ], + Html::rawElement( 'tbody', [], + Html::rawElement( 'tr', [], + Html::element( 'td', [], $this->primitiveValue( $val ) ) + ) + ) + ); + } + + /** + * Create HTML table representing a JSON object. + * + * @param stdClass $mapping + * @return string HTML + */ + protected function objectTable( $mapping ) { + $rows = []; + $empty = true; + + foreach ( $mapping as $key => $val ) { + $rows[] = $this->objectRow( $key, $val ); + $empty = false; + } + if ( $empty ) { + $rows[] = Html::rawElement( 'tr', [], + Html::element( 'td', [ 'class' => 'mw-json-empty' ], + wfMessage( 'content-json-empty-object' )->text() + ) + ); + } + return Html::rawElement( 'table', [ 'class' => 'mw-json' ], + Html::rawElement( 'tbody', [], implode( '', $rows ) ) + ); + } + + /** + * Create HTML table row representing one object property. + * + * @param string $key + * @param mixed $val + * @return string HTML. + */ + protected function objectRow( $key, $val ) { + $th = Html::element( 'th', [], $key ); + $td = $this->valueCell( $val ); + return Html::rawElement( 'tr', [], $th . $td ); + } + + /** + * Create HTML table representing a JSON array. + * + * @param array $mapping + * @return string HTML + */ + protected function arrayTable( $mapping ) { + $rows = []; + $empty = true; + + foreach ( $mapping as $val ) { + $rows[] = $this->arrayRow( $val ); + $empty = false; + } + if ( $empty ) { + $rows[] = Html::rawElement( 'tr', [], + Html::element( 'td', [ 'class' => 'mw-json-empty' ], + wfMessage( 'content-json-empty-array' )->text() + ) + ); + } + return Html::rawElement( 'table', [ 'class' => 'mw-json' ], + Html::rawElement( 'tbody', [], implode( "\n", $rows ) ) + ); + } + + /** + * Create HTML table row representing the value in an array. + * + * @param mixed $val + * @return string HTML. + */ + protected function arrayRow( $val ) { + $td = $this->valueCell( $val ); + return Html::rawElement( 'tr', [], $td ); + } + + /** + * Construct HTML table cell representing any JSON value. + * + * @param mixed $val + * @return string HTML. + */ + protected function valueCell( $val ) { + if ( is_object( $val ) ) { + return Html::rawElement( 'td', [], $this->objectTable( $val ) ); + } + + if ( is_array( $val ) ) { + return Html::rawElement( 'td', [], $this->arrayTable( $val ) ); + } + + return Html::element( 'td', [ 'class' => 'value' ], $this->primitiveValue( $val ) ); + } + + /** + * Construct text representing a JSON primitive value. + * + * @param mixed $val + * @return string Text. + */ + protected function primitiveValue( $val ) { + if ( is_string( $val ) ) { + // Don't FormatJson::encode for strings since we want quotes + // and new lines to render visually instead of escaped. + return '"' . $val . '"'; + } + return FormatJson::encode( $val ); + } +} diff --git a/www/wiki/includes/content/JsonContentHandler.php b/www/wiki/includes/content/JsonContentHandler.php new file mode 100644 index 00000000..edb21f68 --- /dev/null +++ b/www/wiki/includes/content/JsonContentHandler.php @@ -0,0 +1,47 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * Content handler for JSON. + * + * @author Ori Livneh <ori@wikimedia.org> + * @author Kunal Mehta <legoktm@gmail.com> + * + * @since 1.24 + * @ingroup Content + */ +class JsonContentHandler extends CodeContentHandler { + + public function __construct( $modelId = CONTENT_MODEL_JSON ) { + parent::__construct( $modelId, [ CONTENT_FORMAT_JSON ] ); + } + + /** + * @return string + */ + protected function getContentClass() { + return JsonContent::class; + } + + public function makeEmptyContent() { + $class = $this->getContentClass(); + return new $class( '{}' ); + } +} diff --git a/www/wiki/includes/content/MessageContent.php b/www/wiki/includes/content/MessageContent.php new file mode 100644 index 00000000..4b589893 --- /dev/null +++ b/www/wiki/includes/content/MessageContent.php @@ -0,0 +1,174 @@ +<?php +/** + * Wrapper content object allowing to handle a system message as a Content object. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @since 1.21 + * + * @file + * @ingroup Content + * + * @author Daniel Kinzler + */ + +/** + * Wrapper allowing us to handle a system message as a Content object. + * Note that this is generally *not* used to represent content from the + * MediaWiki namespace, and that there is no MessageContentHandler. + * MessageContent is just intended as glue for wrapping a message programmatically. + * + * @ingroup Content + */ +class MessageContent extends AbstractContent { + + /** + * @var Message + */ + protected $mMessage; + + /** + * @param Message|string $msg A Message object, or a message key. + * @param string[] $params An optional array of message parameters. + */ + public function __construct( $msg, $params = null ) { + # XXX: messages may be wikitext, html or plain text! and maybe even something else entirely. + parent::__construct( CONTENT_MODEL_WIKITEXT ); + + if ( is_string( $msg ) ) { + $this->mMessage = wfMessage( $msg ); + } else { + $this->mMessage = clone $msg; + } + + if ( $params ) { + $this->mMessage = $this->mMessage->params( $params ); + } + } + + /** + * Fully parse the text from wikitext to HTML. + * + * @return string Parsed HTML. + */ + public function getHtml() { + return $this->mMessage->parse(); + } + + /** + * Returns the message text. {{-transformation is done. + * + * @return string Unescaped message text. + */ + public function getWikitext() { + return $this->mMessage->text(); + } + + /** + * Returns the message object, with any parameters already substituted. + * + * @return Message The message object. + */ + public function getNativeData() { + // NOTE: Message objects are mutable. Cloning here makes MessageContent immutable. + return clone $this->mMessage; + } + + /** + * @return string + * + * @see Content::getTextForSearchIndex + */ + public function getTextForSearchIndex() { + return $this->mMessage->plain(); + } + + /** + * @return string + * + * @see Content::getWikitextForTransclusion + */ + public function getWikitextForTransclusion() { + return $this->getWikitext(); + } + + /** + * @param int $maxlength Maximum length of the summary text, defaults to 250. + * + * @return string The summary text. + * + * @see Content::getTextForSummary + */ + public function getTextForSummary( $maxlength = 250 ) { + return substr( $this->mMessage->plain(), 0, $maxlength ); + } + + /** + * @return int + * + * @see Content::getSize + */ + public function getSize() { + return strlen( $this->mMessage->plain() ); + } + + /** + * @return Content A copy of this object + * + * @see Content::copy + */ + public function copy() { + // MessageContent is immutable (because getNativeData() returns a clone of the Message object) + return $this; + } + + /** + * @param bool|null $hasLinks + * + * @return bool Always false. + * + * @see Content::isCountable + */ + public function isCountable( $hasLinks = null ) { + return false; + } + + /** + * @param Title $title Unused. + * @param int $revId Unused. + * @param ParserOptions $options Unused. + * @param bool $generateHtml Whether to generate HTML (default: true). + * + * @return ParserOutput + * + * @see Content::getParserOutput + */ + public function getParserOutput( Title $title, $revId = null, + ParserOptions $options = null, $generateHtml = true ) { + if ( $generateHtml ) { + $html = $this->getHtml(); + } else { + $html = ''; + } + + $po = new ParserOutput( $html ); + // Message objects are in the user language. + $po->recordOption( 'userlang' ); + + return $po; + } + +} diff --git a/www/wiki/includes/content/TextContent.php b/www/wiki/includes/content/TextContent.php new file mode 100644 index 00000000..71f65b37 --- /dev/null +++ b/www/wiki/includes/content/TextContent.php @@ -0,0 +1,330 @@ +<?php +/** + * Content object implementation for representing flat text. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @since 1.21 + * + * @file + * @ingroup Content + * + * @author Daniel Kinzler + */ + +/** + * Content object implementation for representing flat text. + * + * TextContent instances are immutable + * + * @ingroup Content + */ +class TextContent extends AbstractContent { + + /** + * @var string + */ + protected $mText; + + /** + * @param string $text + * @param string $model_id + * @throws MWException + */ + public function __construct( $text, $model_id = CONTENT_MODEL_TEXT ) { + parent::__construct( $model_id ); + + if ( $text === null || $text === false ) { + wfWarn( "TextContent constructed with \$text = " . var_export( $text, true ) . "! " + . "This may indicate an error in the caller's scope.", 2 ); + + $text = ''; + } + + if ( !is_string( $text ) ) { + throw new MWException( "TextContent expects a string in the constructor." ); + } + + $this->mText = $text; + } + + /** + * @note Mutable subclasses MUST override this to return a copy! + * + * @return Content $this + */ + public function copy() { + return $this; # NOTE: this is ok since TextContent are immutable. + } + + public function getTextForSummary( $maxlength = 250 ) { + global $wgContLang; + + $text = $this->getNativeData(); + + $truncatedtext = $wgContLang->truncate( + preg_replace( "/[\n\r]/", ' ', $text ), + max( 0, $maxlength ) ); + + return $truncatedtext; + } + + /** + * Returns the text's size in bytes. + * + * @return int + */ + public function getSize() { + $text = $this->getNativeData(); + + return strlen( $text ); + } + + /** + * Returns true if this content is not a redirect, and $wgArticleCountMethod + * is "any". + * + * @param bool|null $hasLinks If it is known whether this content contains links, + * provide this information here, to avoid redundant parsing to find out. + * + * @return bool + */ + public function isCountable( $hasLinks = null ) { + global $wgArticleCountMethod; + + if ( $this->isRedirect() ) { + return false; + } + + if ( $wgArticleCountMethod === 'any' ) { + return true; + } + + return false; + } + + /** + * Returns the text represented by this Content object, as a string. + * + * @return string The raw text. + */ + public function getNativeData() { + return $this->mText; + } + + /** + * Returns the text represented by this Content object, as a string. + * + * @return string The raw text. + */ + public function getTextForSearchIndex() { + return $this->getNativeData(); + } + + /** + * Returns attempts to convert this content object to wikitext, + * and then returns the text string. The conversion may be lossy. + * + * @note this allows any text-based content to be transcluded as if it was wikitext. + * + * @return string|bool The raw text, or false if the conversion failed. + */ + public function getWikitextForTransclusion() { + $wikitext = $this->convert( CONTENT_MODEL_WIKITEXT, 'lossy' ); + + if ( $wikitext ) { + return $wikitext->getNativeData(); + } else { + return false; + } + } + + /** + * Do a "\r\n" -> "\n" and "\r" -> "\n" transformation + * as well as trim trailing whitespace + * + * This was formerly part of Parser::preSaveTransform, but + * for non-wikitext content models they probably still want + * to normalize line endings without all of the other PST + * changes. + * + * @since 1.28 + * @param string $text + * @return string + */ + public static function normalizeLineEndings( $text ) { + return str_replace( [ "\r\n", "\r" ], "\n", rtrim( $text ) ); + } + + /** + * Returns a Content object with pre-save transformations applied. + * + * At a minimum, subclasses should make sure to call TextContent::normalizeLineEndings() + * either directly or part of Parser::preSaveTransform(). + * + * @param Title $title + * @param User $user + * @param ParserOptions $popts + * + * @return Content + */ + public function preSaveTransform( Title $title, User $user, ParserOptions $popts ) { + $text = $this->getNativeData(); + $pst = self::normalizeLineEndings( $text ); + + return ( $text === $pst ) ? $this : new static( $pst, $this->getModel() ); + } + + /** + * Diff this content object with another content object. + * + * @since 1.21 + * + * @param Content $that The other content object to compare this content object to. + * @param Language $lang The language object to use for text segmentation. + * If not given, $wgContentLang is used. + * + * @return Diff A diff representing the changes that would have to be + * made to this content object to make it equal to $that. + */ + public function diff( Content $that, Language $lang = null ) { + global $wgContLang; + + $this->checkModelID( $that->getModel() ); + + // @todo could implement this in DifferenceEngine and just delegate here? + + if ( !$lang ) { + $lang = $wgContLang; + } + + $otext = $this->getNativeData(); + $ntext = $that->getNativeData(); + + # Note: Use native PHP diff, external engines don't give us abstract output + $ota = explode( "\n", $lang->segmentForDiff( $otext ) ); + $nta = explode( "\n", $lang->segmentForDiff( $ntext ) ); + + $diff = new Diff( $ota, $nta ); + + return $diff; + } + + /** + * Fills the provided ParserOutput object with information derived from the content. + * Unless $generateHtml was false, this includes an HTML representation of the content + * provided by getHtml(). + * + * For content models listed in $wgTextModelsToParse, this method will call the MediaWiki + * wikitext parser on the text to extract any (wikitext) links, magic words, etc. + * + * Subclasses may override this to provide custom content processing. + * For custom HTML generation alone, it is sufficient to override getHtml(). + * + * @param Title $title Context title for parsing + * @param int $revId Revision ID (for {{REVISIONID}}) + * @param ParserOptions $options + * @param bool $generateHtml Whether or not to generate HTML + * @param ParserOutput &$output The output object to fill (reference). + */ + protected function fillParserOutput( Title $title, $revId, + ParserOptions $options, $generateHtml, ParserOutput &$output + ) { + global $wgParser, $wgTextModelsToParse; + + if ( in_array( $this->getModel(), $wgTextModelsToParse ) ) { + // parse just to get links etc into the database, HTML is replaced below. + $output = $wgParser->parse( $this->getNativeData(), $title, $options, true, true, $revId ); + } + + if ( $generateHtml ) { + $html = $this->getHtml(); + } else { + $html = ''; + } + + $output->setText( $html ); + } + + /** + * Generates an HTML version of the content, for display. Used by + * fillParserOutput() to provide HTML for the ParserOutput object. + * + * Subclasses may override this to provide a custom HTML rendering. + * If further information is to be derived from the content (such as + * categories), the fillParserOutput() method can be overridden instead. + * + * For backwards-compatibility, this default implementation just calls + * getHighlightHtml(). + * + * @return string An HTML representation of the content + */ + protected function getHtml() { + return $this->getHighlightHtml(); + } + + /** + * Generates an HTML version of the content, for display. + * + * This default implementation returns an HTML-escaped version + * of the raw text content. + * + * @note The functionality of this method should really be implemented + * in getHtml(), and subclasses should override getHtml() if needed. + * getHighlightHtml() is kept around for backward compatibility with + * extensions that already override it. + * + * @deprecated since 1.24. Use getHtml() instead. In particular, subclasses overriding + * getHighlightHtml() should override getHtml() instead. + * + * @return string An HTML representation of the content + */ + protected function getHighlightHtml() { + return htmlspecialchars( $this->getNativeData() ); + } + + /** + * This implementation provides lossless conversion between content models based + * on TextContent. + * + * @param string $toModel The desired content model, use the CONTENT_MODEL_XXX flags. + * @param string $lossy Flag, set to "lossy" to allow lossy conversion. If lossy conversion is not + * allowed, full round-trip conversion is expected to work without losing information. + * + * @return Content|bool A content object with the content model $toModel, or false if that + * conversion is not supported. + * + * @see Content::convert() + */ + public function convert( $toModel, $lossy = '' ) { + $converted = parent::convert( $toModel, $lossy ); + + if ( $converted !== false ) { + return $converted; + } + + $toHandler = ContentHandler::getForModelID( $toModel ); + + if ( $toHandler instanceof TextContentHandler ) { + // NOTE: ignore content serialization format - it's just text anyway. + $text = $this->getNativeData(); + $converted = $toHandler->unserializeContent( $text ); + } + + return $converted; + } + +} diff --git a/www/wiki/includes/content/TextContentHandler.php b/www/wiki/includes/content/TextContentHandler.php new file mode 100644 index 00000000..4a7944ca --- /dev/null +++ b/www/wiki/includes/content/TextContentHandler.php @@ -0,0 +1,162 @@ +<?php +/** + * Base content handler class for flat text contents. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @since 1.21 + * + * @file + * @ingroup Content + */ + +/** + * Base content handler implementation for flat text contents. + * + * @ingroup Content + */ +class TextContentHandler extends ContentHandler { + + public function __construct( $modelId = CONTENT_MODEL_TEXT, $formats = [ CONTENT_FORMAT_TEXT ] ) { + parent::__construct( $modelId, $formats ); + } + + /** + * Returns the content's text as-is. + * + * @param Content $content + * @param string $format The serialization format to check + * + * @return mixed + */ + public function serializeContent( Content $content, $format = null ) { + $this->checkFormat( $format ); + + return $content->getNativeData(); + } + + /** + * Attempts to merge differences between three versions. Returns a new + * Content object for a clean merge and false for failure or a conflict. + * + * All three Content objects passed as parameters must have the same + * content model. + * + * This text-based implementation uses wfMerge(). + * + * @param Content $oldContent The page's previous content. + * @param Content $myContent One of the page's conflicting contents. + * @param Content $yourContent One of the page's conflicting contents. + * + * @return Content|bool + */ + public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) { + $this->checkModelID( $oldContent->getModel() ); + $this->checkModelID( $myContent->getModel() ); + $this->checkModelID( $yourContent->getModel() ); + + $format = $this->getDefaultFormat(); + + $old = $this->serializeContent( $oldContent, $format ); + $mine = $this->serializeContent( $myContent, $format ); + $yours = $this->serializeContent( $yourContent, $format ); + + $ok = wfMerge( $old, $mine, $yours, $result ); + + if ( !$ok ) { + return false; + } + + if ( !$result ) { + return $this->makeEmptyContent(); + } + + $mergedContent = $this->unserializeContent( $result, $format ); + + return $mergedContent; + } + + /** + * Returns the name of the associated Content class, to + * be used when creating new objects. Override expected + * by subclasses. + * + * @since 1.24 + * + * @return string + */ + protected function getContentClass() { + return TextContent::class; + } + + /** + * Unserializes a Content object of the type supported by this ContentHandler. + * + * @since 1.21 + * + * @param string $text Serialized form of the content + * @param string $format The format used for serialization + * + * @return Content The TextContent object wrapping $text + */ + public function unserializeContent( $text, $format = null ) { + $this->checkFormat( $format ); + + $class = $this->getContentClass(); + return new $class( $text ); + } + + /** + * Creates an empty TextContent object. + * + * @since 1.21 + * + * @return Content A new TextContent object with empty text. + */ + public function makeEmptyContent() { + $class = $this->getContentClass(); + return new $class( '' ); + } + + /** + * @see ContentHandler::supportsDirectEditing + * + * @return bool Default is true for TextContent and derivatives. + */ + public function supportsDirectEditing() { + return true; + } + + public function getFieldsForSearchIndex( SearchEngine $engine ) { + $fields = parent::getFieldsForSearchIndex( $engine ); + $fields['language'] = + $engine->makeSearchFieldMapping( 'language', SearchIndexField::INDEX_TYPE_KEYWORD ); + + return $fields; + } + + public function getDataForSearchIndex( + WikiPage $page, + ParserOutput $output, + SearchEngine $engine + ) { + $fields = parent::getDataForSearchIndex( $page, $output, $engine ); + $fields['language'] = + $this->getPageLanguage( $page->getTitle(), $page->getContent() )->getCode(); + return $fields; + } + +} diff --git a/www/wiki/includes/content/WikiTextStructure.php b/www/wiki/includes/content/WikiTextStructure.php new file mode 100644 index 00000000..1128d7bd --- /dev/null +++ b/www/wiki/includes/content/WikiTextStructure.php @@ -0,0 +1,254 @@ +<?php + +use HtmlFormatter\HtmlFormatter; + +/** + * Class allowing to explore structure of parsed wikitext. + */ +class WikiTextStructure { + /** + * @var string + */ + private $openingText; + /** + * @var string + */ + private $allText; + /** + * @var string[] + */ + private $auxText = []; + /** + * @var ParserOutput + */ + private $parserOutput; + + /** + * @var string[] selectors to elements that are excluded entirely from search + */ + private $excludedElementSelectors = [ + // "it looks like you don't have javascript enabled..." – do not need to index + 'audio', 'video', + // CSS stylesheets aren't content + 'style', + // The [1] for references + 'sup.reference', + // The ↑ next to references in the references section + '.mw-cite-backlink', + // Headings are already indexed in their own field. + 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', + // Collapsed fields are hidden by default so we don't want them showing up. + '.autocollapse', + // Content explicitly decided to be not searchable by editors such + // as custom navigation templates. + '.navigation-not-searchable' + ]; + + /** + * @var string[] selectors to elements that are considered auxiliary to article text for search + */ + private $auxiliaryElementSelectors = [ + // Thumbnail captions aren't really part of the text proper + '.thumbcaption', + // Neither are tables + 'table', + // Common style for "See also:". + '.rellink', + // Common style for calling out helpful links at the top of the article. + '.dablink', + // New class users can use to mark stuff as auxiliary to searches. + '.searchaux', + ]; + + /** + * @param ParserOutput $parserOutput + */ + public function __construct( ParserOutput $parserOutput ) { + $this->parserOutput = $parserOutput; + } + + /** + * Get headings on the page. + * @return string[] + * First strip out things that look like references. We can't use HTML filtering because + * the references come back as <sup> tags without a class. To keep from breaking stuff like + * ==Applicability of the strict mass–energy equivalence formula, ''E'' = ''mc''<sup>2</sup>== + * we don't remove the whole <sup> tag. We also don't want to strip the <sup> tag and remove + * everything that looks like [2] because, I dunno, maybe there is a band named Word [2] Foo + * or something. Whatever. So we only strip things that look like <sup> tags wrapping a + * reference. And since the data looks like: + * Reference in heading <sup>[1]</sup><sup>[2]</sup> + * we can not really use HtmlFormatter as we have no suitable selector. + */ + public function headings() { + $headings = []; + $ignoredHeadings = $this->getIgnoredHeadings(); + foreach ( $this->parserOutput->getSections() as $heading ) { + $heading = $heading[ 'line' ]; + + // Some wikis wrap the brackets in a span: + // https://en.wikipedia.org/wiki/MediaWiki:Cite_reference_link + $heading = preg_replace( '/<\/?span>/', '', $heading ); + // Normalize [] so the following regexp would work. + $heading = preg_replace( [ '/[/', '/]/' ], [ '[', ']' ], $heading ); + $heading = preg_replace( '/<sup>\s*\[\s*\d+\s*\]\s*<\/sup>/is', '', $heading ); + + // Strip tags from the heading or else we'll display them (escaped) in search results + $heading = trim( Sanitizer::stripAllTags( $heading ) ); + + // Note that we don't take the level of the heading into account - all headings are equal. + // Except the ones we ignore. + if ( !in_array( $heading, $ignoredHeadings ) ) { + $headings[] = $heading; + } + } + return $headings; + } + + /** + * Parse a message content into an array. This function is generally used to + * parse settings stored as i18n messages (see search-ignored-headings). + * + * @param string $message + * @return string[] + */ + public static function parseSettingsInMessage( $message ) { + $lines = explode( "\n", $message ); + $lines = preg_replace( '/#.*$/', '', $lines ); // Remove comments + $lines = array_map( 'trim', $lines ); // Remove extra spaces + $lines = array_filter( $lines ); // Remove empty lines + return $lines; + } + + /** + * Get list of heading to ignore. + * @return string[] + */ + private function getIgnoredHeadings() { + static $ignoredHeadings = null; + if ( $ignoredHeadings === null ) { + $ignoredHeadings = []; + $source = wfMessage( 'search-ignored-headings' )->inContentLanguage(); + if ( $source->isBlank() ) { + // Try old version too, just in case + $source = wfMessage( 'cirrussearch-ignored-headings' )->inContentLanguage(); + } + if ( !$source->isDisabled() ) { + $lines = self::parseSettingsInMessage( $source->plain() ); + $ignoredHeadings = $lines; // Now we just have headings! + } + } + return $ignoredHeadings; + } + + /** + * Extract parts of the text - opening, main and auxiliary. + */ + private function extractWikitextParts() { + if ( !is_null( $this->allText ) ) { + return; + } + $text = $this->parserOutput->getText( [ + 'enableSectionEditTokens' => false, + 'allowTOC' => false, + ] ); + if ( strlen( $text ) == 0 ) { + $this->allText = ""; + // empty text - nothing to seek here + return; + } + $opening = null; + + $this->openingText = $this->extractHeadingBeforeFirstHeading( $text ); + + // Add extra spacing around break tags so text crammed together like<br>this + // doesn't make one word. + $text = str_replace( '<br', "\n<br", $text ); + + $formatter = new HtmlFormatter( $text ); + + // Strip elements from the page that we never want in the search text. + $formatter->remove( $this->excludedElementSelectors ); + $formatter->filterContent(); + + // Strip elements from the page that are auxiliary text. These will still be + // searched but matches will be ranked lower and non-auxiliary matches will be + // preferred in highlighting. + $formatter->remove( $this->auxiliaryElementSelectors ); + $auxiliaryElements = $formatter->filterContent(); + $this->allText = trim( Sanitizer::stripAllTags( $formatter->getText() ) ); + foreach ( $auxiliaryElements as $auxiliaryElement ) { + $this->auxText[] = + trim( Sanitizer::stripAllTags( $formatter->getText( $auxiliaryElement ) ) ); + } + } + + /** + * Get text before first heading. + * @param string $text + * @return string|null + */ + private function extractHeadingBeforeFirstHeading( $text ) { + $matches = []; + if ( !preg_match( '/<h[123456]>/', $text, $matches, PREG_OFFSET_CAPTURE ) ) { + // There isn't a first heading so we interpret this as the article + // being entirely without heading. + return null; + } + $text = substr( $text, 0, $matches[ 0 ][ 1 ] ); + if ( !$text ) { + // There isn't any text before the first heading so we declare there isn't + // a first heading. + return null; + } + + $formatter = new HtmlFormatter( $text ); + $formatter->remove( $this->excludedElementSelectors ); + $formatter->remove( $this->auxiliaryElementSelectors ); + $formatter->filterContent(); + $text = trim( Sanitizer::stripAllTags( $formatter->getText() ) ); + + if ( !$text ) { + // There isn't any text after filtering before the first heading so we declare + // that there isn't a first heading. + return null; + } + + return $text; + } + + /** + * Get opening text + * @return string + */ + public function getOpeningText() { + $this->extractWikitextParts(); + return $this->openingText; + } + + /** + * Get main text + * @return string + */ + public function getMainText() { + $this->extractWikitextParts(); + return $this->allText; + } + + /** + * Get auxiliary text + * @return string[] + */ + public function getAuxiliaryText() { + $this->extractWikitextParts(); + return $this->auxText; + } + + /** + * Get the defaultsort property + * @return string|null + */ + public function getDefaultSort() { + return $this->parserOutput->getProperty( 'defaultsort' ); + } +} diff --git a/www/wiki/includes/content/WikitextContent.php b/www/wiki/includes/content/WikitextContent.php new file mode 100644 index 00000000..5beef31b --- /dev/null +++ b/www/wiki/includes/content/WikitextContent.php @@ -0,0 +1,363 @@ +<?php +/** + * Content object for wiki text pages. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @since 1.21 + * + * @file + * @ingroup Content + * + * @author Daniel Kinzler + */ + +/** + * Content object for wiki text pages. + * + * @ingroup Content + */ +class WikitextContent extends TextContent { + private $redirectTargetAndText = null; + + public function __construct( $text ) { + parent::__construct( $text, CONTENT_MODEL_WIKITEXT ); + } + + /** + * @param string|int $sectionId + * + * @return Content|bool|null + * + * @see Content::getSection() + */ + public function getSection( $sectionId ) { + global $wgParser; + + $text = $this->getNativeData(); + $sect = $wgParser->getSection( $text, $sectionId, false ); + + if ( $sect === false ) { + return false; + } else { + return new static( $sect ); + } + } + + /** + * @param string|int|null|bool $sectionId + * @param Content $with + * @param string $sectionTitle + * + * @throws MWException + * @return Content + * + * @see Content::replaceSection() + */ + public function replaceSection( $sectionId, Content $with, $sectionTitle = '' ) { + $myModelId = $this->getModel(); + $sectionModelId = $with->getModel(); + + if ( $sectionModelId != $myModelId ) { + throw new MWException( "Incompatible content model for section: " . + "document uses $myModelId but " . + "section uses $sectionModelId." ); + } + + $oldtext = $this->getNativeData(); + $text = $with->getNativeData(); + + if ( strval( $sectionId ) === '' ) { + return $with; # XXX: copy first? + } + + if ( $sectionId === 'new' ) { + # Inserting a new section + $subject = $sectionTitle ? wfMessage( 'newsectionheaderdefaultlevel' ) + ->plaintextParams( $sectionTitle )->inContentLanguage()->text() . "\n\n" : ''; + if ( Hooks::run( 'PlaceNewSection', [ $this, $oldtext, $subject, &$text ] ) ) { + $text = strlen( trim( $oldtext ) ) > 0 + ? "{$oldtext}\n\n{$subject}{$text}" + : "{$subject}{$text}"; + } + } else { + # Replacing an existing section; roll out the big guns + global $wgParser; + + $text = $wgParser->replaceSection( $oldtext, $sectionId, $text ); + } + + $newContent = new static( $text ); + + return $newContent; + } + + /** + * Returns a new WikitextContent object with the given section heading + * prepended. + * + * @param string $header + * + * @return Content + */ + public function addSectionHeader( $header ) { + $text = wfMessage( 'newsectionheaderdefaultlevel' ) + ->rawParams( $header )->inContentLanguage()->text(); + $text .= "\n\n"; + $text .= $this->getNativeData(); + + return new static( $text ); + } + + /** + * Returns a Content object with pre-save transformations applied using + * Parser::preSaveTransform(). + * + * @param Title $title + * @param User $user + * @param ParserOptions $popts + * + * @return Content + */ + public function preSaveTransform( Title $title, User $user, ParserOptions $popts ) { + global $wgParser; + + $text = $this->getNativeData(); + $pst = $wgParser->preSaveTransform( $text, $title, $user, $popts ); + + return ( $text === $pst ) ? $this : new static( $pst ); + } + + /** + * Returns a Content object with preload transformations applied (or this + * object if no transformations apply). + * + * @param Title $title + * @param ParserOptions $popts + * @param array $params + * + * @return Content + */ + public function preloadTransform( Title $title, ParserOptions $popts, $params = [] ) { + global $wgParser; + + $text = $this->getNativeData(); + $plt = $wgParser->getPreloadText( $text, $title, $popts, $params ); + + return new static( $plt ); + } + + /** + * Extract the redirect target and the remaining text on the page. + * + * @note migrated here from Title::newFromRedirectInternal() + * + * @since 1.23 + * + * @return array List of two elements: Title|null and string. + */ + protected function getRedirectTargetAndText() { + global $wgMaxRedirects; + + if ( $this->redirectTargetAndText !== null ) { + return $this->redirectTargetAndText; + } + + if ( $wgMaxRedirects < 1 ) { + // redirects are disabled, so quit early + $this->redirectTargetAndText = [ null, $this->getNativeData() ]; + return $this->redirectTargetAndText; + } + + $redir = MagicWord::get( 'redirect' ); + $text = ltrim( $this->getNativeData() ); + if ( $redir->matchStartAndRemove( $text ) ) { + // Extract the first link and see if it's usable + // Ensure that it really does come directly after #REDIRECT + // Some older redirects included a colon, so don't freak about that! + $m = []; + if ( preg_match( '!^\s*:?\s*\[{2}(.*?)(?:\|.*?)?\]{2}\s*!', $text, $m ) ) { + // Strip preceding colon used to "escape" categories, etc. + // and URL-decode links + if ( strpos( $m[1], '%' ) !== false ) { + // Match behavior of inline link parsing here; + $m[1] = rawurldecode( ltrim( $m[1], ':' ) ); + } + $title = Title::newFromText( $m[1] ); + // If the title is a redirect to bad special pages or is invalid, return null + if ( !$title instanceof Title || !$title->isValidRedirectTarget() ) { + $this->redirectTargetAndText = [ null, $this->getNativeData() ]; + return $this->redirectTargetAndText; + } + + $this->redirectTargetAndText = [ $title, substr( $text, strlen( $m[0] ) ) ]; + return $this->redirectTargetAndText; + } + } + + $this->redirectTargetAndText = [ null, $this->getNativeData() ]; + return $this->redirectTargetAndText; + } + + /** + * Implement redirect extraction for wikitext. + * + * @return Title|null + * + * @see Content::getRedirectTarget + */ + public function getRedirectTarget() { + list( $title, ) = $this->getRedirectTargetAndText(); + + return $title; + } + + /** + * This implementation replaces the first link on the page with the given new target + * if this Content object is a redirect. Otherwise, this method returns $this. + * + * @since 1.21 + * + * @param Title $target + * + * @return Content + * + * @see Content::updateRedirect() + */ + public function updateRedirect( Title $target ) { + if ( !$this->isRedirect() ) { + return $this; + } + + # Fix the text + # Remember that redirect pages can have categories, templates, etc., + # so the regex has to be fairly general + $newText = preg_replace( '/ \[ \[ [^\]]* \] \] /x', + '[[' . $target->getFullText() . ']]', + $this->getNativeData(), 1 ); + + return new static( $newText ); + } + + /** + * Returns true if this content is not a redirect, and this content's text + * is countable according to the criteria defined by $wgArticleCountMethod. + * + * @param bool|null $hasLinks If it is known whether this content contains + * links, provide this information here, to avoid redundant parsing to + * find out (default: null). + * @param Title|null $title Optional title, defaults to the title from the current main request. + * + * @return bool + */ + public function isCountable( $hasLinks = null, Title $title = null ) { + global $wgArticleCountMethod; + + if ( $this->isRedirect() ) { + return false; + } + + if ( $wgArticleCountMethod === 'link' ) { + if ( $hasLinks === null ) { # not known, find out + if ( !$title ) { + $context = RequestContext::getMain(); + $title = $context->getTitle(); + } + + $po = $this->getParserOutput( $title, null, null, false ); + $links = $po->getLinks(); + $hasLinks = !empty( $links ); + } + + return $hasLinks; + } + + return true; + } + + /** + * @param int $maxlength + * @return string + */ + public function getTextForSummary( $maxlength = 250 ) { + $truncatedtext = parent::getTextForSummary( $maxlength ); + + # clean up unfinished links + # XXX: make this optional? wasn't there in autosummary, but required for + # deletion summary. + $truncatedtext = preg_replace( '/\[\[([^\]]*)\]?$/', '$1', $truncatedtext ); + + return $truncatedtext; + } + + /** + * Returns a ParserOutput object resulting from parsing the content's text + * using $wgParser. + * + * @param Title $title + * @param int $revId Revision to pass to the parser (default: null) + * @param ParserOptions $options (default: null) + * @param bool $generateHtml (default: true) + * @param ParserOutput &$output ParserOutput representing the HTML form of the text, + * may be manipulated or replaced. + */ + protected function fillParserOutput( Title $title, $revId, + ParserOptions $options, $generateHtml, ParserOutput &$output + ) { + global $wgParser; + + list( $redir, $text ) = $this->getRedirectTargetAndText(); + $output = $wgParser->parse( $text, $title, $options, true, true, $revId ); + + // Add redirect indicator at the top + if ( $redir ) { + // Make sure to include the redirect link in pagelinks + $output->addLink( $redir ); + if ( $generateHtml ) { + $chain = $this->getRedirectChain(); + $output->setText( + Article::getRedirectHeaderHtml( $title->getPageLanguage(), $chain, false ) . + $output->getRawText() + ); + $output->addModuleStyles( 'mediawiki.action.view.redirectPage' ); + } + } + } + + /** + * @throws MWException + */ + protected function getHtml() { + throw new MWException( + "getHtml() not implemented for wikitext. " + . "Use getParserOutput()->getText()." + ); + } + + /** + * This implementation calls $word->match() on the this TextContent object's text. + * + * @param MagicWord $word + * + * @return bool + * + * @see Content::matchMagicWord() + */ + public function matchMagicWord( MagicWord $word ) { + return $word->match( $this->getNativeData() ); + } + +} diff --git a/www/wiki/includes/content/WikitextContentHandler.php b/www/wiki/includes/content/WikitextContentHandler.php new file mode 100644 index 00000000..9c26ae15 --- /dev/null +++ b/www/wiki/includes/content/WikitextContentHandler.php @@ -0,0 +1,163 @@ +<?php +/** + * Content handler for wiki text pages. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @since 1.21 + * + * @file + * @ingroup Content + */ + +/** + * Content handler for wiki text pages. + * + * @ingroup Content + */ +class WikitextContentHandler extends TextContentHandler { + + public function __construct( $modelId = CONTENT_MODEL_WIKITEXT ) { + parent::__construct( $modelId, [ CONTENT_FORMAT_WIKITEXT ] ); + } + + protected function getContentClass() { + return WikitextContent::class; + } + + /** + * Returns a WikitextContent object representing a redirect to the given destination page. + * + * @param Title $destination The page to redirect to. + * @param string $text Text to include in the redirect, if possible. + * + * @return Content + * + * @see ContentHandler::makeRedirectContent + */ + public function makeRedirectContent( Title $destination, $text = '' ) { + $optionalColon = ''; + + if ( $destination->getNamespace() == NS_CATEGORY ) { + $optionalColon = ':'; + } else { + $iw = $destination->getInterwiki(); + if ( $iw && Language::fetchLanguageName( $iw, null, 'mw' ) ) { + $optionalColon = ':'; + } + } + + $mwRedir = MagicWord::get( 'redirect' ); + $redirectText = $mwRedir->getSynonym( 0 ) . + ' [[' . $optionalColon . $destination->getFullText() . ']]'; + + if ( $text != '' ) { + $redirectText .= "\n" . $text; + } + + $class = $this->getContentClass(); + return new $class( $redirectText ); + } + + /** + * Returns true because wikitext supports redirects. + * + * @return bool Always true. + * + * @see ContentHandler::supportsRedirects + */ + public function supportsRedirects() { + return true; + } + + /** + * Returns true because wikitext supports sections. + * + * @return bool Always true. + * + * @see ContentHandler::supportsSections + */ + public function supportsSections() { + return true; + } + + /** + * Returns true, because wikitext supports caching using the + * ParserCache mechanism. + * + * @since 1.21 + * + * @return bool Always true. + * + * @see ContentHandler::isParserCacheSupported + */ + public function isParserCacheSupported() { + return true; + } + + /** + * Get file handler + * @return FileContentHandler + */ + protected function getFileHandler() { + return new FileContentHandler(); + } + + public function getFieldsForSearchIndex( SearchEngine $engine ) { + $fields = parent::getFieldsForSearchIndex( $engine ); + + $fields['heading'] = + $engine->makeSearchFieldMapping( 'heading', SearchIndexField::INDEX_TYPE_TEXT ); + $fields['heading']->setFlag( SearchIndexField::FLAG_SCORING ); + + $fields['auxiliary_text'] = + $engine->makeSearchFieldMapping( 'auxiliary_text', SearchIndexField::INDEX_TYPE_TEXT ); + + $fields['opening_text'] = + $engine->makeSearchFieldMapping( 'opening_text', SearchIndexField::INDEX_TYPE_TEXT ); + $fields['opening_text']->setFlag( + SearchIndexField::FLAG_SCORING | SearchIndexField::FLAG_NO_HIGHLIGHT + ); + // Until we have full first-class content handler for files, we invoke it explicitly here + $fields = array_merge( $fields, $this->getFileHandler()->getFieldsForSearchIndex( $engine ) ); + + return $fields; + } + + public function getDataForSearchIndex( + WikiPage $page, + ParserOutput $parserOutput, + SearchEngine $engine + ) { + $fields = parent::getDataForSearchIndex( $page, $parserOutput, $engine ); + + $structure = new WikiTextStructure( $parserOutput ); + $fields['heading'] = $structure->headings(); + // text fields + $fields['opening_text'] = $structure->getOpeningText(); + $fields['text'] = $structure->getMainText(); // overwrites one from ContentHandler + $fields['auxiliary_text'] = $structure->getAuxiliaryText(); + $fields['defaultsort'] = $structure->getDefaultSort(); + + // Until we have full first-class content handler for files, we invoke it explicitly here + if ( NS_FILE == $page->getTitle()->getNamespace() ) { + $fields = array_merge( $fields, + $this->getFileHandler()->getDataForSearchIndex( $page, $parserOutput, $engine ) ); + } + return $fields; + } + +} |