summaryrefslogtreecommitdiff
path: root/www/wiki/maintenance/cleanupUploadStash.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/maintenance/cleanupUploadStash.php')
-rw-r--r--www/wiki/maintenance/cleanupUploadStash.php156
1 files changed, 156 insertions, 0 deletions
diff --git a/www/wiki/maintenance/cleanupUploadStash.php b/www/wiki/maintenance/cleanupUploadStash.php
new file mode 100644
index 00000000..61cd9c24
--- /dev/null
+++ b/www/wiki/maintenance/cleanupUploadStash.php
@@ -0,0 +1,156 @@
+<?php
+/**
+ * Remove old or broken uploads from temporary uploaded file storage,
+ * clean up associated database records
+ *
+ * Copyright © 2011, Wikimedia Foundation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @author Ian Baker <ibaker@wikimedia.org>
+ * @ingroup Maintenance
+ */
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script to remove old or broken uploads from temporary uploaded
+ * file storage and clean up associated database records.
+ *
+ * @ingroup Maintenance
+ */
+class UploadStashCleanup extends Maintenance {
+
+ public function __construct() {
+ parent::__construct();
+ $this->addDescription( 'Clean up abandoned files in temporary uploaded file stash' );
+ $this->setBatchSize( 50 );
+ }
+
+ public function execute() {
+ global $wgUploadStashMaxAge;
+
+ $repo = RepoGroup::singleton()->getLocalRepo();
+ $tempRepo = $repo->getTempRepo();
+
+ $dbr = $repo->getReplicaDB();
+
+ // how far back should this look for files to delete?
+ $cutoff = time() - $wgUploadStashMaxAge;
+
+ $this->output( "Getting list of files to clean up...\n" );
+ $res = $dbr->select(
+ 'uploadstash',
+ 'us_key',
+ 'us_timestamp < ' . $dbr->addQuotes( $dbr->timestamp( $cutoff ) ),
+ __METHOD__
+ );
+
+ // Delete all registered stash files...
+ if ( $res->numRows() == 0 ) {
+ $this->output( "No stashed files to cleanup according to the DB.\n" );
+ } else {
+ // finish the read before starting writes.
+ $keys = [];
+ foreach ( $res as $row ) {
+ array_push( $keys, $row->us_key );
+ }
+
+ $this->output( 'Removing ' . count( $keys ) . " file(s)...\n" );
+ // this could be done some other, more direct/efficient way, but using
+ // UploadStash's own methods means it's less likely to fall accidentally
+ // out-of-date someday
+ $stash = new UploadStash( $repo );
+
+ $i = 0;
+ foreach ( $keys as $key ) {
+ $i++;
+ try {
+ $stash->getFile( $key, true );
+ $stash->removeFileNoAuth( $key );
+ } catch ( UploadStashException $ex ) {
+ $type = get_class( $ex );
+ $this->output( "Failed removing stashed upload with key: $key ($type)\n" );
+ }
+ if ( $i % 100 == 0 ) {
+ wfWaitForSlaves();
+ $this->output( "$i\n" );
+ }
+ }
+ $this->output( "$i done\n" );
+ }
+
+ // Delete all the corresponding thumbnails...
+ $dir = $tempRepo->getZonePath( 'thumb' );
+ $iterator = $tempRepo->getBackend()->getFileList( [ 'dir' => $dir, 'adviseStat' => 1 ] );
+ $this->output( "Deleting old thumbnails...\n" );
+ $i = 0;
+ $batch = []; // operation batch
+ foreach ( $iterator as $file ) {
+ if ( wfTimestamp( TS_UNIX, $tempRepo->getFileTimestamp( "$dir/$file" ) ) < $cutoff ) {
+ $batch[] = [ 'op' => 'delete', 'src' => "$dir/$file" ];
+ if ( count( $batch ) >= $this->getBatchSize() ) {
+ $this->doOperations( $tempRepo, $batch );
+ $i += count( $batch );
+ $batch = [];
+ $this->output( "$i\n" );
+ }
+ }
+ }
+ if ( count( $batch ) ) {
+ $this->doOperations( $tempRepo, $batch );
+ $i += count( $batch );
+ }
+ $this->output( "$i done\n" );
+
+ // Apparently lots of stash files are not registered in the DB...
+ $dir = $tempRepo->getZonePath( 'public' );
+ $iterator = $tempRepo->getBackend()->getFileList( [ 'dir' => $dir, 'adviseStat' => 1 ] );
+ $this->output( "Deleting orphaned temp files...\n" );
+ if ( strpos( $dir, '/local-temp' ) === false ) { // sanity check
+ $this->fatalError( "Temp repo is not using the temp container." );
+ }
+ $i = 0;
+ $batch = []; // operation batch
+ foreach ( $iterator as $file ) {
+ if ( wfTimestamp( TS_UNIX, $tempRepo->getFileTimestamp( "$dir/$file" ) ) < $cutoff ) {
+ $batch[] = [ 'op' => 'delete', 'src' => "$dir/$file" ];
+ if ( count( $batch ) >= $this->getBatchSize() ) {
+ $this->doOperations( $tempRepo, $batch );
+ $i += count( $batch );
+ $batch = [];
+ $this->output( "$i\n" );
+ }
+ }
+ }
+ if ( count( $batch ) ) {
+ $this->doOperations( $tempRepo, $batch );
+ $i += count( $batch );
+ }
+ $this->output( "$i done\n" );
+ }
+
+ protected function doOperations( FileRepo $tempRepo, array $ops ) {
+ $status = $tempRepo->getBackend()->doQuickOperations( $ops );
+ if ( !$status->isOK() ) {
+ $this->error( print_r( $status->getErrorsArray(), true ) );
+ }
+ }
+}
+
+$maintClass = UploadStashCleanup::class;
+require_once RUN_MAINTENANCE_IF_MAIN;