diff options
Diffstat (limited to 'www/wiki/maintenance/cleanupUploadStash.php')
-rw-r--r-- | www/wiki/maintenance/cleanupUploadStash.php | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/www/wiki/maintenance/cleanupUploadStash.php b/www/wiki/maintenance/cleanupUploadStash.php new file mode 100644 index 00000000..61cd9c24 --- /dev/null +++ b/www/wiki/maintenance/cleanupUploadStash.php @@ -0,0 +1,156 @@ +<?php +/** + * Remove old or broken uploads from temporary uploaded file storage, + * clean up associated database records + * + * Copyright © 2011, Wikimedia Foundation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @author Ian Baker <ibaker@wikimedia.org> + * @ingroup Maintenance + */ + +require_once __DIR__ . '/Maintenance.php'; + +/** + * Maintenance script to remove old or broken uploads from temporary uploaded + * file storage and clean up associated database records. + * + * @ingroup Maintenance + */ +class UploadStashCleanup extends Maintenance { + + public function __construct() { + parent::__construct(); + $this->addDescription( 'Clean up abandoned files in temporary uploaded file stash' ); + $this->setBatchSize( 50 ); + } + + public function execute() { + global $wgUploadStashMaxAge; + + $repo = RepoGroup::singleton()->getLocalRepo(); + $tempRepo = $repo->getTempRepo(); + + $dbr = $repo->getReplicaDB(); + + // how far back should this look for files to delete? + $cutoff = time() - $wgUploadStashMaxAge; + + $this->output( "Getting list of files to clean up...\n" ); + $res = $dbr->select( + 'uploadstash', + 'us_key', + 'us_timestamp < ' . $dbr->addQuotes( $dbr->timestamp( $cutoff ) ), + __METHOD__ + ); + + // Delete all registered stash files... + if ( $res->numRows() == 0 ) { + $this->output( "No stashed files to cleanup according to the DB.\n" ); + } else { + // finish the read before starting writes. + $keys = []; + foreach ( $res as $row ) { + array_push( $keys, $row->us_key ); + } + + $this->output( 'Removing ' . count( $keys ) . " file(s)...\n" ); + // this could be done some other, more direct/efficient way, but using + // UploadStash's own methods means it's less likely to fall accidentally + // out-of-date someday + $stash = new UploadStash( $repo ); + + $i = 0; + foreach ( $keys as $key ) { + $i++; + try { + $stash->getFile( $key, true ); + $stash->removeFileNoAuth( $key ); + } catch ( UploadStashException $ex ) { + $type = get_class( $ex ); + $this->output( "Failed removing stashed upload with key: $key ($type)\n" ); + } + if ( $i % 100 == 0 ) { + wfWaitForSlaves(); + $this->output( "$i\n" ); + } + } + $this->output( "$i done\n" ); + } + + // Delete all the corresponding thumbnails... + $dir = $tempRepo->getZonePath( 'thumb' ); + $iterator = $tempRepo->getBackend()->getFileList( [ 'dir' => $dir, 'adviseStat' => 1 ] ); + $this->output( "Deleting old thumbnails...\n" ); + $i = 0; + $batch = []; // operation batch + foreach ( $iterator as $file ) { + if ( wfTimestamp( TS_UNIX, $tempRepo->getFileTimestamp( "$dir/$file" ) ) < $cutoff ) { + $batch[] = [ 'op' => 'delete', 'src' => "$dir/$file" ]; + if ( count( $batch ) >= $this->getBatchSize() ) { + $this->doOperations( $tempRepo, $batch ); + $i += count( $batch ); + $batch = []; + $this->output( "$i\n" ); + } + } + } + if ( count( $batch ) ) { + $this->doOperations( $tempRepo, $batch ); + $i += count( $batch ); + } + $this->output( "$i done\n" ); + + // Apparently lots of stash files are not registered in the DB... + $dir = $tempRepo->getZonePath( 'public' ); + $iterator = $tempRepo->getBackend()->getFileList( [ 'dir' => $dir, 'adviseStat' => 1 ] ); + $this->output( "Deleting orphaned temp files...\n" ); + if ( strpos( $dir, '/local-temp' ) === false ) { // sanity check + $this->fatalError( "Temp repo is not using the temp container." ); + } + $i = 0; + $batch = []; // operation batch + foreach ( $iterator as $file ) { + if ( wfTimestamp( TS_UNIX, $tempRepo->getFileTimestamp( "$dir/$file" ) ) < $cutoff ) { + $batch[] = [ 'op' => 'delete', 'src' => "$dir/$file" ]; + if ( count( $batch ) >= $this->getBatchSize() ) { + $this->doOperations( $tempRepo, $batch ); + $i += count( $batch ); + $batch = []; + $this->output( "$i\n" ); + } + } + } + if ( count( $batch ) ) { + $this->doOperations( $tempRepo, $batch ); + $i += count( $batch ); + } + $this->output( "$i done\n" ); + } + + protected function doOperations( FileRepo $tempRepo, array $ops ) { + $status = $tempRepo->getBackend()->doQuickOperations( $ops ); + if ( !$status->isOK() ) { + $this->error( print_r( $status->getErrorsArray(), true ) ); + } + } +} + +$maintClass = UploadStashCleanup::class; +require_once RUN_MAINTENANCE_IF_MAIN; |