summaryrefslogtreecommitdiff
path: root/www/wiki/includes/specials/SpecialExport.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/includes/specials/SpecialExport.php')
-rw-r--r--www/wiki/includes/specials/SpecialExport.php593
1 files changed, 593 insertions, 0 deletions
diff --git a/www/wiki/includes/specials/SpecialExport.php b/www/wiki/includes/specials/SpecialExport.php
new file mode 100644
index 00000000..5a98bb90
--- /dev/null
+++ b/www/wiki/includes/specials/SpecialExport.php
@@ -0,0 +1,593 @@
+<?php
+/**
+ * Implements Special:Export
+ *
+ * Copyright © 2003-2008 Brion Vibber <brion@pobox.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup SpecialPage
+ */
+
+use MediaWiki\MediaWikiServices;
+
+/**
+ * A special page that allows users to export pages in a XML file
+ *
+ * @ingroup SpecialPage
+ */
+class SpecialExport extends SpecialPage {
+ private $curonly, $doExport, $pageLinkDepth, $templates;
+
+ public function __construct() {
+ parent::__construct( 'Export' );
+ }
+
+ public function execute( $par ) {
+ $this->setHeaders();
+ $this->outputHeader();
+ $config = $this->getConfig();
+
+ // Set some variables
+ $this->curonly = true;
+ $this->doExport = false;
+ $request = $this->getRequest();
+ $this->templates = $request->getCheck( 'templates' );
+ $this->pageLinkDepth = $this->validateLinkDepth(
+ $request->getIntOrNull( 'pagelink-depth' )
+ );
+ $nsindex = '';
+ $exportall = false;
+
+ if ( $request->getCheck( 'addcat' ) ) {
+ $page = $request->getText( 'pages' );
+ $catname = $request->getText( 'catname' );
+
+ if ( $catname !== '' && $catname !== null && $catname !== false ) {
+ $t = Title::makeTitleSafe( NS_MAIN, $catname );
+ if ( $t ) {
+ /**
+ * @todo FIXME: This can lead to hitting memory limit for very large
+ * categories. Ideally we would do the lookup synchronously
+ * during the export in a single query.
+ */
+ $catpages = $this->getPagesFromCategory( $t );
+ if ( $catpages ) {
+ if ( $page !== '' ) {
+ $page .= "\n";
+ }
+ $page .= implode( "\n", $catpages );
+ }
+ }
+ }
+ } elseif ( $request->getCheck( 'addns' ) && $config->get( 'ExportFromNamespaces' ) ) {
+ $page = $request->getText( 'pages' );
+ $nsindex = $request->getText( 'nsindex', '' );
+
+ if ( strval( $nsindex ) !== '' ) {
+ /**
+ * Same implementation as above, so same @todo
+ */
+ $nspages = $this->getPagesFromNamespace( $nsindex );
+ if ( $nspages ) {
+ $page .= "\n" . implode( "\n", $nspages );
+ }
+ }
+ } elseif ( $request->getCheck( 'exportall' ) && $config->get( 'ExportAllowAll' ) ) {
+ $this->doExport = true;
+ $exportall = true;
+
+ /* Although $page and $history are not used later on, we
+ nevertheless set them to avoid that PHP notices about using
+ undefined variables foul up our XML output (see call to
+ doExport(...) further down) */
+ $page = '';
+ $history = '';
+ } elseif ( $request->wasPosted() && $par == '' ) {
+ $page = $request->getText( 'pages' );
+ $this->curonly = $request->getCheck( 'curonly' );
+ $rawOffset = $request->getVal( 'offset' );
+
+ if ( $rawOffset ) {
+ $offset = wfTimestamp( TS_MW, $rawOffset );
+ } else {
+ $offset = null;
+ }
+
+ $maxHistory = $config->get( 'ExportMaxHistory' );
+ $limit = $request->getInt( 'limit' );
+ $dir = $request->getVal( 'dir' );
+ $history = [
+ 'dir' => 'asc',
+ 'offset' => false,
+ 'limit' => $maxHistory,
+ ];
+ $historyCheck = $request->getCheck( 'history' );
+
+ if ( $this->curonly ) {
+ $history = WikiExporter::CURRENT;
+ } elseif ( !$historyCheck ) {
+ if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
+ $history['limit'] = $limit;
+ }
+
+ if ( !is_null( $offset ) ) {
+ $history['offset'] = $offset;
+ }
+
+ if ( strtolower( $dir ) == 'desc' ) {
+ $history['dir'] = 'desc';
+ }
+ }
+
+ if ( $page != '' ) {
+ $this->doExport = true;
+ }
+ } else {
+ // Default to current-only for GET requests.
+ $page = $request->getText( 'pages', $par );
+ $historyCheck = $request->getCheck( 'history' );
+
+ if ( $historyCheck ) {
+ $history = WikiExporter::FULL;
+ } else {
+ $history = WikiExporter::CURRENT;
+ }
+
+ if ( $page != '' ) {
+ $this->doExport = true;
+ }
+ }
+
+ if ( !$config->get( 'ExportAllowHistory' ) ) {
+ // Override
+ $history = WikiExporter::CURRENT;
+ }
+
+ $list_authors = $request->getCheck( 'listauthors' );
+ if ( !$this->curonly || !$config->get( 'ExportAllowListContributors' ) ) {
+ $list_authors = false;
+ }
+
+ if ( $this->doExport ) {
+ $this->getOutput()->disable();
+
+ // Cancel output buffering and gzipping if set
+ // This should provide safer streaming for pages with history
+ wfResetOutputBuffers();
+ $request->response()->header( "Content-type: application/xml; charset=utf-8" );
+ $request->response()->header( "X-Robots-Tag: noindex,nofollow" );
+
+ if ( $request->getCheck( 'wpDownload' ) ) {
+ // Provide a sane filename suggestion
+ $filename = urlencode( $config->get( 'Sitename' ) . '-' . wfTimestampNow() . '.xml' );
+ $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
+ }
+
+ $this->doExport( $page, $history, $list_authors, $exportall );
+
+ return;
+ }
+
+ $out = $this->getOutput();
+ $out->addWikiMsg( 'exporttext' );
+
+ if ( $page == '' ) {
+ $categoryName = $request->getText( 'catname' );
+ } else {
+ $categoryName = '';
+ }
+
+ $formDescriptor = [
+ 'catname' => [
+ 'type' => 'textwithbutton',
+ 'name' => 'catname',
+ 'horizontal-label' => true,
+ 'label-message' => 'export-addcattext',
+ 'default' => $categoryName,
+ 'size' => 40,
+ 'buttontype' => 'submit',
+ 'buttonname' => 'addcat',
+ 'buttondefault' => $this->msg( 'export-addcat' )->text(),
+ 'hide-if' => [ '===', 'exportall', '1' ],
+ ],
+ ];
+ if ( $config->get( 'ExportFromNamespaces' ) ) {
+ $formDescriptor += [
+ 'nsindex' => [
+ 'type' => 'namespaceselectwithbutton',
+ 'default' => $nsindex,
+ 'label-message' => 'export-addnstext',
+ 'horizontal-label' => true,
+ 'name' => 'nsindex',
+ 'id' => 'namespace',
+ 'cssclass' => 'namespaceselector',
+ 'buttontype' => 'submit',
+ 'buttonname' => 'addns',
+ 'buttondefault' => $this->msg( 'export-addns' )->text(),
+ 'hide-if' => [ '===', 'exportall', '1' ],
+ ],
+ ];
+ }
+
+ if ( $config->get( 'ExportAllowAll' ) ) {
+ $formDescriptor += [
+ 'exportall' => [
+ 'type' => 'check',
+ 'label-message' => 'exportall',
+ 'name' => 'exportall',
+ 'id' => 'exportall',
+ 'default' => $request->wasPosted() ? $request->getCheck( 'exportall' ) : false,
+ ],
+ ];
+ }
+
+ $formDescriptor += [
+ 'textarea' => [
+ 'class' => HTMLTextAreaField::class,
+ 'name' => 'pages',
+ 'label-message' => 'export-manual',
+ 'nodata' => true,
+ 'rows' => 10,
+ 'default' => $page,
+ 'hide-if' => [ '===', 'exportall', '1' ],
+ ],
+ ];
+
+ if ( $config->get( 'ExportAllowHistory' ) ) {
+ $formDescriptor += [
+ 'curonly' => [
+ 'type' => 'check',
+ 'label-message' => 'exportcuronly',
+ 'name' => 'curonly',
+ 'id' => 'curonly',
+ 'default' => $request->wasPosted() ? $request->getCheck( 'curonly' ) : true,
+ ],
+ ];
+ } else {
+ $out->addWikiMsg( 'exportnohistory' );
+ }
+
+ $formDescriptor += [
+ 'templates' => [
+ 'type' => 'check',
+ 'label-message' => 'export-templates',
+ 'name' => 'templates',
+ 'id' => 'wpExportTemplates',
+ 'default' => $request->wasPosted() ? $request->getCheck( 'templates' ) : false,
+ ],
+ ];
+
+ if ( $config->get( 'ExportMaxLinkDepth' ) || $this->userCanOverrideExportDepth() ) {
+ $formDescriptor += [
+ 'pagelink-depth' => [
+ 'type' => 'text',
+ 'name' => 'pagelink-depth',
+ 'id' => 'pagelink-depth',
+ 'label-message' => 'export-pagelinks',
+ 'default' => '0',
+ 'size' => 20,
+ ],
+ ];
+ }
+
+ $formDescriptor += [
+ 'wpDownload' => [
+ 'type' => 'check',
+ 'name' => 'wpDownload',
+ 'id' => 'wpDownload',
+ 'default' => $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true,
+ 'label-message' => 'export-download',
+ ],
+ ];
+
+ if ( $config->get( 'ExportAllowListContributors' ) ) {
+ $formDescriptor += [
+ 'listauthors' => [
+ 'type' => 'check',
+ 'label-message' => 'exportlistauthors',
+ 'default' => $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false,
+ 'name' => 'listauthors',
+ 'id' => 'listauthors',
+ ],
+ ];
+ }
+
+ $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
+ $htmlForm->setSubmitTextMsg( 'export-submit' );
+ $htmlForm->prepareForm()->displayForm( false );
+ $this->addHelpLink( 'Help:Export' );
+ }
+
+ /**
+ * @return bool
+ */
+ private function userCanOverrideExportDepth() {
+ return $this->getUser()->isAllowed( 'override-export-depth' );
+ }
+
+ /**
+ * Do the actual page exporting
+ *
+ * @param string $page User input on what page(s) to export
+ * @param int $history One of the WikiExporter history export constants
+ * @param bool $list_authors Whether to add distinct author list (when
+ * not returning full history)
+ * @param bool $exportall Whether to export everything
+ */
+ private function doExport( $page, $history, $list_authors, $exportall ) {
+ // If we are grabbing everything, enable full history and ignore the rest
+ if ( $exportall ) {
+ $history = WikiExporter::FULL;
+ } else {
+ $pageSet = []; // Inverted index of all pages to look up
+
+ // Split up and normalize input
+ foreach ( explode( "\n", $page ) as $pageName ) {
+ $pageName = trim( $pageName );
+ $title = Title::newFromText( $pageName );
+ if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
+ // Only record each page once!
+ $pageSet[$title->getPrefixedText()] = true;
+ }
+ }
+
+ // Set of original pages to pass on to further manipulation...
+ $inputPages = array_keys( $pageSet );
+
+ // Look up any linked pages if asked...
+ if ( $this->templates ) {
+ $pageSet = $this->getTemplates( $inputPages, $pageSet );
+ }
+ $linkDepth = $this->pageLinkDepth;
+ if ( $linkDepth ) {
+ $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
+ }
+
+ $pages = array_keys( $pageSet );
+
+ // Normalize titles to the same format and remove dupes, see T19374
+ foreach ( $pages as $k => $v ) {
+ $pages[$k] = str_replace( " ", "_", $v );
+ }
+
+ $pages = array_unique( $pages );
+ }
+
+ /* Ok, let's get to it... */
+ if ( $history == WikiExporter::CURRENT ) {
+ $lb = false;
+ $db = wfGetDB( DB_REPLICA );
+ $buffer = WikiExporter::BUFFER;
+ } else {
+ // Use an unbuffered query; histories may be very long!
+ $lb = MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->newMainLB();
+ $db = $lb->getConnection( DB_REPLICA );
+ $buffer = WikiExporter::STREAM;
+
+ // This might take a while... :D
+ Wikimedia\suppressWarnings();
+ set_time_limit( 0 );
+ Wikimedia\restoreWarnings();
+ }
+
+ $exporter = new WikiExporter( $db, $history, $buffer );
+ $exporter->list_authors = $list_authors;
+ $exporter->openStream();
+
+ if ( $exportall ) {
+ $exporter->allPages();
+ } else {
+ foreach ( $pages as $page ) {
+ # T10824: Only export pages the user can read
+ $title = Title::newFromText( $page );
+ if ( is_null( $title ) ) {
+ // @todo Perhaps output an <error> tag or something.
+ continue;
+ }
+
+ if ( !$title->userCan( 'read', $this->getUser() ) ) {
+ // @todo Perhaps output an <error> tag or something.
+ continue;
+ }
+
+ $exporter->pageByTitle( $title );
+ }
+ }
+
+ $exporter->closeStream();
+
+ if ( $lb ) {
+ $lb->closeAll();
+ }
+ }
+
+ /**
+ * @param Title $title
+ * @return array
+ */
+ private function getPagesFromCategory( $title ) {
+ global $wgContLang;
+
+ $maxPages = $this->getConfig()->get( 'ExportPagelistLimit' );
+
+ $name = $title->getDBkey();
+
+ $dbr = wfGetDB( DB_REPLICA );
+ $res = $dbr->select(
+ [ 'page', 'categorylinks' ],
+ [ 'page_namespace', 'page_title' ],
+ [ 'cl_from=page_id', 'cl_to' => $name ],
+ __METHOD__,
+ [ 'LIMIT' => $maxPages ]
+ );
+
+ $pages = [];
+
+ foreach ( $res as $row ) {
+ $n = $row->page_title;
+ if ( $row->page_namespace ) {
+ $ns = $wgContLang->getNsText( $row->page_namespace );
+ $n = $ns . ':' . $n;
+ }
+
+ $pages[] = $n;
+ }
+
+ return $pages;
+ }
+
+ /**
+ * @param int $nsindex
+ * @return array
+ */
+ private function getPagesFromNamespace( $nsindex ) {
+ global $wgContLang;
+
+ $maxPages = $this->getConfig()->get( 'ExportPagelistLimit' );
+
+ $dbr = wfGetDB( DB_REPLICA );
+ $res = $dbr->select(
+ 'page',
+ [ 'page_namespace', 'page_title' ],
+ [ 'page_namespace' => $nsindex ],
+ __METHOD__,
+ [ 'LIMIT' => $maxPages ]
+ );
+
+ $pages = [];
+
+ foreach ( $res as $row ) {
+ $n = $row->page_title;
+
+ if ( $row->page_namespace ) {
+ $ns = $wgContLang->getNsText( $row->page_namespace );
+ $n = $ns . ':' . $n;
+ }
+
+ $pages[] = $n;
+ }
+
+ return $pages;
+ }
+
+ /**
+ * Expand a list of pages to include templates used in those pages.
+ * @param array $inputPages List of titles to look up
+ * @param array $pageSet Associative array indexed by titles for output
+ * @return array Associative array index by titles
+ */
+ private function getTemplates( $inputPages, $pageSet ) {
+ return $this->getLinks( $inputPages, $pageSet,
+ 'templatelinks',
+ [ 'namespace' => 'tl_namespace', 'title' => 'tl_title' ],
+ [ 'page_id=tl_from' ]
+ );
+ }
+
+ /**
+ * Validate link depth setting, if available.
+ * @param int $depth
+ * @return int
+ */
+ private function validateLinkDepth( $depth ) {
+ if ( $depth < 0 ) {
+ return 0;
+ }
+
+ if ( !$this->userCanOverrideExportDepth() ) {
+ $maxLinkDepth = $this->getConfig()->get( 'ExportMaxLinkDepth' );
+ if ( $depth > $maxLinkDepth ) {
+ return $maxLinkDepth;
+ }
+ }
+
+ /*
+ * There's a HARD CODED limit of 5 levels of recursion here to prevent a
+ * crazy-big export from being done by someone setting the depth
+ * number too high. In other words, last resort safety net.
+ */
+
+ return intval( min( $depth, 5 ) );
+ }
+
+ /**
+ * Expand a list of pages to include pages linked to from that page.
+ * @param array $inputPages
+ * @param array $pageSet
+ * @param int $depth
+ * @return array
+ */
+ private function getPageLinks( $inputPages, $pageSet, $depth ) {
+ for ( ; $depth > 0; --$depth ) {
+ $pageSet = $this->getLinks(
+ $inputPages, $pageSet, 'pagelinks',
+ [ 'namespace' => 'pl_namespace', 'title' => 'pl_title' ],
+ [ 'page_id=pl_from' ]
+ );
+ $inputPages = array_keys( $pageSet );
+ }
+
+ return $pageSet;
+ }
+
+ /**
+ * Expand a list of pages to include items used in those pages.
+ * @param array $inputPages Array of page titles
+ * @param array $pageSet
+ * @param string $table
+ * @param array $fields Array of field names
+ * @param array $join
+ * @return array
+ */
+ private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
+ $dbr = wfGetDB( DB_REPLICA );
+
+ foreach ( $inputPages as $page ) {
+ $title = Title::newFromText( $page );
+
+ if ( $title ) {
+ $pageSet[$title->getPrefixedText()] = true;
+ /// @todo FIXME: May or may not be more efficient to batch these
+ /// by namespace when given multiple input pages.
+ $result = $dbr->select(
+ [ 'page', $table ],
+ $fields,
+ array_merge(
+ $join,
+ [
+ 'page_namespace' => $title->getNamespace(),
+ 'page_title' => $title->getDBkey()
+ ]
+ ),
+ __METHOD__
+ );
+
+ foreach ( $result as $row ) {
+ $template = Title::makeTitle( $row->namespace, $row->title );
+ $pageSet[$template->getPrefixedText()] = true;
+ }
+ }
+ }
+
+ return $pageSet;
+ }
+
+ protected function getGroupName() {
+ return 'pagetools';
+ }
+}