diff options
Diffstat (limited to 'www/wiki/maintenance/storage/testCompression.php')
-rw-r--r-- | www/wiki/maintenance/storage/testCompression.php | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/www/wiki/maintenance/storage/testCompression.php b/www/wiki/maintenance/storage/testCompression.php new file mode 100644 index 00000000..c3ed4fce --- /dev/null +++ b/www/wiki/maintenance/storage/testCompression.php @@ -0,0 +1,104 @@ +<?php +/** + * Test revision text compression and decompression. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Maintenance ExternalStorage + */ + +$optionsWithArgs = [ 'start', 'limit', 'type' ]; +require __DIR__ . '/../commandLine.inc'; + +if ( !isset( $args[0] ) ) { + echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] " . + "[--limit=<num-revs>] <page-title>\n"; + exit( 1 ); +} + +$lang = Language::factory( 'en' ); +$title = Title::newFromText( $args[0] ); +if ( isset( $options['start'] ) ) { + $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) ); + echo "Starting from " . $lang->timeanddate( $start ) . "\n"; +} else { + $start = '19700101000000'; +} +if ( isset( $options['limit'] ) ) { + $limit = $options['limit']; + $untilHappy = false; +} else { + $limit = 1000; + $untilHappy = true; +} +$type = isset( $options['type'] ) ? $options['type'] : ConcatenatedGzipHistoryBlob::class; + +$dbr = $this->getDB( DB_REPLICA ); +$revQuery = Revision::getQueryInfo( [ 'page', 'text' ] ); +$res = $dbr->select( + $revQuery['tables'], + $revQuery['fields'], + [ + 'page_namespace' => $title->getNamespace(), + 'page_title' => $title->getDBkey(), + 'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ), + ], + __FILE__, + [ 'LIMIT' => $limit ], + $revQuery['joins'] +); + +$blob = new $type; +$hashes = []; +$keys = []; +$uncompressedSize = 0; +$t = -microtime( true ); +foreach ( $res as $row ) { + $revision = new Revision( $row ); + $text = $revision->getSerializedData(); + $uncompressedSize += strlen( $text ); + $hashes[$row->rev_id] = md5( $text ); + $keys[$row->rev_id] = $blob->addItem( $text ); + if ( $untilHappy && !$blob->isHappy() ) { + break; + } +} + +$serialized = serialize( $blob ); +$t += microtime( true ); +# print_r( $blob->mDiffMap ); + +printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n", + $type, + count( $hashes ), + $uncompressedSize / strlen( $serialized ), + $lang->formatSize( $uncompressedSize ), + strlen( $serialized ) +); +printf( "Compression time: %5.2f ms\n", $t * 1000 ); + +$t = -microtime( true ); +$blob = unserialize( $serialized ); +foreach ( $keys as $id => $key ) { + $text = $blob->getItem( $key ); + if ( md5( $text ) != $hashes[$id] ) { + echo "Content hash mismatch for rev_id $id\n"; + # var_dump( $text ); + } +} +$t += microtime( true ); +printf( "Decompression time: %5.2f ms\n", $t * 1000 ); |