diff options
author | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
---|---|---|
committer | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
commit | fc7369835258467bf97eb64f184b93691f9a9fd5 (patch) | |
tree | daabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/includes/utils |
first commit
Diffstat (limited to 'www/wiki/includes/utils')
-rw-r--r-- | www/wiki/includes/utils/AutoloadGenerator.php | 508 | ||||
-rw-r--r-- | www/wiki/includes/utils/AvroValidator.php | 181 | ||||
-rw-r--r-- | www/wiki/includes/utils/BatchRowIterator.php | 296 | ||||
-rw-r--r-- | www/wiki/includes/utils/BatchRowUpdate.php | 128 | ||||
-rw-r--r-- | www/wiki/includes/utils/BatchRowWriter.php | 75 | ||||
-rw-r--r-- | www/wiki/includes/utils/ExecutableFinder.php | 115 | ||||
-rw-r--r-- | www/wiki/includes/utils/FileContentsHasher.php | 114 | ||||
-rw-r--r-- | www/wiki/includes/utils/MWCryptHKDF.php | 103 | ||||
-rw-r--r-- | www/wiki/includes/utils/MWCryptRand.php | 79 | ||||
-rw-r--r-- | www/wiki/includes/utils/MWFileProps.php | 145 | ||||
-rw-r--r-- | www/wiki/includes/utils/MWRestrictions.php | 147 | ||||
-rw-r--r-- | www/wiki/includes/utils/README | 9 | ||||
-rw-r--r-- | www/wiki/includes/utils/RowUpdateGenerator.php | 39 | ||||
-rw-r--r-- | www/wiki/includes/utils/UIDGenerator.php | 629 | ||||
-rw-r--r-- | www/wiki/includes/utils/ZipDirectoryReader.php | 717 | ||||
-rw-r--r-- | www/wiki/includes/utils/ZipDirectoryReaderError.php | 38 |
16 files changed, 3323 insertions, 0 deletions
diff --git a/www/wiki/includes/utils/AutoloadGenerator.php b/www/wiki/includes/utils/AutoloadGenerator.php new file mode 100644 index 00000000..0e2ef85d --- /dev/null +++ b/www/wiki/includes/utils/AutoloadGenerator.php @@ -0,0 +1,508 @@ +<?php + +/** + * Accepts a list of files and directories to search for + * php files and generates $wgAutoloadLocalClasses or $wgAutoloadClasses + * lines for all detected classes. These lines are written out + * to an autoload.php file in the projects provided basedir. + * + * Usage: + * + * $gen = new AutoloadGenerator( __DIR__ ); + * $gen->readDir( __DIR__ . '/includes' ); + * $gen->readFile( __DIR__ . '/foo.php' ) + * $gen->getAutoload(); + */ +class AutoloadGenerator { + const FILETYPE_JSON = 'json'; + const FILETYPE_PHP = 'php'; + + /** + * @var string Root path of the project being scanned for classes + */ + protected $basepath; + + /** + * @var ClassCollector Helper class extracts class names from php files + */ + protected $collector; + + /** + * @var array Map of file shortpath to list of FQCN detected within file + */ + protected $classes = []; + + /** + * @var string The global variable to write output to + */ + protected $variableName = 'wgAutoloadClasses'; + + /** + * @var array Map of FQCN to relative path(from self::$basepath) + */ + protected $overrides = []; + + /** + * Directories that should be excluded + * + * @var string[] + */ + protected $excludePaths = []; + + /** + * @param string $basepath Root path of the project being scanned for classes + * @param array|string $flags + * + * local - If this flag is set $wgAutoloadLocalClasses will be build instead + * of $wgAutoloadClasses + */ + public function __construct( $basepath, $flags = [] ) { + if ( !is_array( $flags ) ) { + $flags = [ $flags ]; + } + $this->basepath = self::normalizePathSeparator( realpath( $basepath ) ); + $this->collector = new ClassCollector; + if ( in_array( 'local', $flags ) ) { + $this->variableName = 'wgAutoloadLocalClasses'; + } + } + + /** + * Directories that should be excluded + * + * @since 1.31 + * @param string[] $paths + */ + public function setExcludePaths( array $paths ) { + foreach ( $paths as $path ) { + $this->excludePaths[] = self::normalizePathSeparator( $path ); + } + } + + /** + * Whether the file should be excluded + * + * @param string $path File path + * @return bool + */ + private function shouldExclude( $path ) { + foreach ( $this->excludePaths as $dir ) { + if ( strpos( $path, $dir ) === 0 ) { + return true; + } + } + + return false; + } + + /** + * Force a class to be autoloaded from a specific path, regardless of where + * or if it was detected. + * + * @param string $fqcn FQCN to force the location of + * @param string $inputPath Full path to the file containing the class + * @throws Exception + */ + public function forceClassPath( $fqcn, $inputPath ) { + $path = self::normalizePathSeparator( realpath( $inputPath ) ); + if ( !$path ) { + throw new \Exception( "Invalid path: $inputPath" ); + } + $len = strlen( $this->basepath ); + if ( substr( $path, 0, $len ) !== $this->basepath ) { + throw new \Exception( "Path is not within basepath: $inputPath" ); + } + $shortpath = substr( $path, $len ); + $this->overrides[$fqcn] = $shortpath; + } + + /** + * @param string $inputPath Path to a php file to find classes within + * @throws Exception + */ + public function readFile( $inputPath ) { + // NOTE: do NOT expand $inputPath using realpath(). It is perfectly + // reasonable for LocalSettings.php and similiar files to be symlinks + // to files that are outside of $this->basepath. + $inputPath = self::normalizePathSeparator( $inputPath ); + $len = strlen( $this->basepath ); + if ( substr( $inputPath, 0, $len ) !== $this->basepath ) { + throw new \Exception( "Path is not within basepath: $inputPath" ); + } + if ( $this->shouldExclude( $inputPath ) ) { + return; + } + $result = $this->collector->getClasses( + file_get_contents( $inputPath ) + ); + if ( $result ) { + $shortpath = substr( $inputPath, $len ); + $this->classes[$shortpath] = $result; + } + } + + /** + * @param string $dir Path to a directory to recursively search + * for php files with either .php or .inc extensions + */ + public function readDir( $dir ) { + $it = new RecursiveDirectoryIterator( + self::normalizePathSeparator( realpath( $dir ) ) ); + $it = new RecursiveIteratorIterator( $it ); + + foreach ( $it as $path => $file ) { + $ext = pathinfo( $path, PATHINFO_EXTENSION ); + // some older files in mw use .inc + if ( $ext === 'php' || $ext === 'inc' ) { + $this->readFile( $path ); + } + } + } + + /** + * Updates the AutoloadClasses field at the given + * filename. + * + * @param string $filename Filename of JSON + * extension/skin registration file + * @return string Updated Json of the file given as the $filename parameter + */ + protected function generateJsonAutoload( $filename ) { + $key = 'AutoloadClasses'; + $json = FormatJson::decode( file_get_contents( $filename ), true ); + unset( $json[$key] ); + // Inverting the key-value pairs so that they become of the + // format class-name : path when they get converted into json. + foreach ( $this->classes as $path => $contained ) { + foreach ( $contained as $fqcn ) { + // Using substr to remove the leading '/' + $json[$key][$fqcn] = substr( $path, 1 ); + } + } + foreach ( $this->overrides as $path => $fqcn ) { + // Using substr to remove the leading '/' + $json[$key][$fqcn] = substr( $path, 1 ); + } + + // Sorting the list of autoload classes. + ksort( $json[$key] ); + + // Return the whole JSON file + return FormatJson::encode( $json, "\t", FormatJson::ALL_OK ) . "\n"; + } + + /** + * Generates a PHP file setting up autoload information. + * + * @param string $commandName Command name to include in comment + * @param string $filename of PHP file to put autoload information in. + * @return string + */ + protected function generatePHPAutoload( $commandName, $filename ) { + // No existing JSON file found; update/generate PHP file + $content = []; + + // We need to generate a line each rather than exporting the + // full array so __DIR__ can be prepended to all the paths + $format = "%s => __DIR__ . %s,"; + foreach ( $this->classes as $path => $contained ) { + $exportedPath = var_export( $path, true ); + foreach ( $contained as $fqcn ) { + $content[$fqcn] = sprintf( + $format, + var_export( $fqcn, true ), + $exportedPath + ); + } + } + + foreach ( $this->overrides as $fqcn => $path ) { + $content[$fqcn] = sprintf( + $format, + var_export( $fqcn, true ), + var_export( $path, true ) + ); + } + + // sort for stable output + ksort( $content ); + + // extensions using this generator are appending to the existing + // autoload. + if ( $this->variableName === 'wgAutoloadClasses' ) { + $op = '+='; + } else { + $op = '='; + } + + $output = implode( "\n\t", $content ); + return <<<EOD +<?php +// This file is generated by $commandName, do not adjust manually +// phpcs:disable Generic.Files.LineLength +global \${$this->variableName}; + +\${$this->variableName} {$op} [ + {$output} +]; + +EOD; + } + + /** + * Returns all known classes as a string, which can be used to put into a target + * file (e.g. extension.json, skin.json or autoload.php) + * + * @param string $commandName Value used in file comment to direct + * developers towards the appropriate way to update the autoload. + * @return string + */ + public function getAutoload( $commandName = 'AutoloadGenerator' ) { + // We need to check whether an extenson.json or skin.json exists or not, and + // incase it doesn't, update the autoload.php file. + + $fileinfo = $this->getTargetFileinfo(); + + if ( $fileinfo['type'] === self::FILETYPE_JSON ) { + return $this->generateJsonAutoload( $fileinfo['filename'] ); + } else { + return $this->generatePHPAutoload( $commandName, $fileinfo['filename'] ); + } + } + + /** + * Returns the filename of the extension.json of skin.json, if there's any, or + * otherwise the path to the autoload.php file in an array as the "filename" + * key and with the type (AutoloadGenerator::FILETYPE_JSON or AutoloadGenerator::FILETYPE_PHP) + * of the file as the "type" key. + * + * @return array + */ + public function getTargetFileinfo() { + $fileinfo = [ + 'filename' => $this->basepath . '/autoload.php', + 'type' => self::FILETYPE_PHP + ]; + if ( file_exists( $this->basepath . '/extension.json' ) ) { + $fileinfo = [ + 'filename' => $this->basepath . '/extension.json', + 'type' => self::FILETYPE_JSON + ]; + } elseif ( file_exists( $this->basepath . '/skin.json' ) ) { + $fileinfo = [ + 'filename' => $this->basepath . '/skin.json', + 'type' => self::FILETYPE_JSON + ]; + } + + return $fileinfo; + } + + /** + * Ensure that Unix-style path separators ("/") are used in the path. + * + * @param string $path + * @return string + */ + protected static function normalizePathSeparator( $path ) { + return str_replace( '\\', '/', $path ); + } + + /** + * Initialize the source files and directories which are used for the MediaWiki default + * autoloader in {mw-base-dir}/autoload.php including: + * * includes/ + * * languages/ + * * maintenance/ + * * mw-config/ + * * /*.php + */ + public function initMediaWikiDefault() { + foreach ( [ 'includes', 'languages', 'maintenance', 'mw-config' ] as $dir ) { + $this->readDir( $this->basepath . '/' . $dir ); + } + foreach ( glob( $this->basepath . '/*.php' ) as $file ) { + $this->readFile( $file ); + } + } +} + +/** + * Reads PHP code and returns the FQCN of every class defined within it. + */ +class ClassCollector { + + /** + * @var string Current namespace + */ + protected $namespace = ''; + + /** + * @var array List of FQCN detected in this pass + */ + protected $classes; + + /** + * @var array Token from token_get_all() that started an expect sequence + */ + protected $startToken; + + /** + * @var array List of tokens that are members of the current expect sequence + */ + protected $tokens; + + /** + * @var array Class alias with target/name fields + */ + protected $alias; + + /** + * @param string $code PHP code (including <?php) to detect class names from + * @return array List of FQCN detected within the tokens + */ + public function getClasses( $code ) { + $this->namespace = ''; + $this->classes = []; + $this->startToken = null; + $this->alias = null; + $this->tokens = []; + + foreach ( token_get_all( $code ) as $token ) { + if ( $this->startToken === null ) { + $this->tryBeginExpect( $token ); + } else { + $this->tryEndExpect( $token ); + } + } + + return $this->classes; + } + + /** + * Determine if $token begins the next expect sequence. + * + * @param array $token + */ + protected function tryBeginExpect( $token ) { + if ( is_string( $token ) ) { + return; + } + // Note: When changing class name discovery logic, + // AutoLoaderTest.php may also need to be updated. + switch ( $token[0] ) { + case T_NAMESPACE: + case T_CLASS: + case T_INTERFACE: + case T_TRAIT: + case T_DOUBLE_COLON: + $this->startToken = $token; + break; + case T_STRING: + if ( $token[1] === 'class_alias' ) { + $this->startToken = $token; + $this->alias = []; + } + } + } + + /** + * Accepts the next token in an expect sequence + * + * @param array $token + */ + protected function tryEndExpect( $token ) { + switch ( $this->startToken[0] ) { + case T_DOUBLE_COLON: + // Skip over T_CLASS after T_DOUBLE_COLON because this is something like + // "self::static" which accesses the class name. It doens't define a new class. + $this->startToken = null; + break; + case T_NAMESPACE: + if ( $token === ';' || $token === '{' ) { + $this->namespace = $this->implodeTokens() . '\\'; + } else { + $this->tokens[] = $token; + } + break; + + case T_STRING: + if ( $this->alias !== null ) { + // Flow 1 - Two string literals: + // - T_STRING class_alias + // - '(' + // - T_CONSTANT_ENCAPSED_STRING 'TargetClass' + // - ',' + // - T_WHITESPACE + // - T_CONSTANT_ENCAPSED_STRING 'AliasName' + // - ')' + // Flow 2 - Use of ::class syntax for first parameter + // - T_STRING class_alias + // - '(' + // - T_STRING TargetClass + // - T_DOUBLE_COLON :: + // - T_CLASS class + // - ',' + // - T_WHITESPACE + // - T_CONSTANT_ENCAPSED_STRING 'AliasName' + // - ')' + if ( $token === '(' ) { + // Start of a function call to class_alias() + $this->alias = [ 'target' => false, 'name' => false ]; + } elseif ( $token === ',' ) { + // Record that we're past the first parameter + if ( $this->alias['target'] === false ) { + $this->alias['target'] = true; + } + } elseif ( is_array( $token ) && $token[0] === T_CONSTANT_ENCAPSED_STRING ) { + if ( $this->alias['target'] === true ) { + // We already saw a first argument, this must be the second. + // Strip quotes from the string literal. + $this->alias['name'] = substr( $token[1], 1, -1 ); + } + } elseif ( $token === ')' ) { + // End of function call + $this->classes[] = $this->alias['name']; + $this->alias = null; + $this->startToken = null; + } elseif ( !is_array( $token ) || ( + $token[0] !== T_STRING && + $token[0] !== T_DOUBLE_COLON && + $token[0] !== T_CLASS && + $token[0] !== T_WHITESPACE + ) ) { + // Ignore this call to class_alias() - compat/Timestamp.php + $this->alias = null; + $this->startToken = null; + } + } + break; + + case T_CLASS: + case T_INTERFACE: + case T_TRAIT: + $this->tokens[] = $token; + if ( is_array( $token ) && $token[0] === T_STRING ) { + $this->classes[] = $this->namespace . $this->implodeTokens(); + } + } + } + + /** + * Returns the string representation of the tokens within the + * current expect sequence and resets the sequence. + * + * @return string + */ + protected function implodeTokens() { + $content = []; + foreach ( $this->tokens as $token ) { + $content[] = is_string( $token ) ? $token : $token[1]; + } + + $this->tokens = []; + $this->startToken = null; + + return trim( implode( '', $content ), " \n\t" ); + } +} diff --git a/www/wiki/includes/utils/AvroValidator.php b/www/wiki/includes/utils/AvroValidator.php new file mode 100644 index 00000000..153b3135 --- /dev/null +++ b/www/wiki/includes/utils/AvroValidator.php @@ -0,0 +1,181 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * Generate error strings for data that doesn't match the specified + * Avro schema. This is very similar to AvroSchema::is_valid_datum(), + * but returns error messages instead of a boolean. + * + * @since 1.26 + * @author Erik Bernhardson <ebernhardson@wikimedia.org> + * @copyright © 2015 Erik Bernhardson and Wikimedia Foundation. + */ +class AvroValidator { + /** + * @param AvroSchema $schema The rules to conform to. + * @param mixed $datum The value to validate against $schema. + * @return string|string[] An error or list of errors in the + * provided $datum. When no errors exist the empty array is + * returned. + */ + public static function getErrors( AvroSchema $schema, $datum ) { + switch ( $schema->type ) { + case AvroSchema::NULL_TYPE: + if ( !is_null( $datum ) ) { + return self::wrongType( 'null', $datum ); + } + return []; + case AvroSchema::BOOLEAN_TYPE: + if ( !is_bool( $datum ) ) { + return self::wrongType( 'boolean', $datum ); + } + return []; + case AvroSchema::STRING_TYPE: + case AvroSchema::BYTES_TYPE: + if ( !is_string( $datum ) ) { + return self::wrongType( 'string', $datum ); + } + return []; + case AvroSchema::INT_TYPE: + if ( !is_int( $datum ) ) { + return self::wrongType( 'integer', $datum ); + } + if ( AvroSchema::INT_MIN_VALUE > $datum + || $datum > AvroSchema::INT_MAX_VALUE + ) { + return self::outOfRange( + AvroSchema::INT_MIN_VALUE, + AvroSchema::INT_MAX_VALUE, + $datum + ); + } + return []; + case AvroSchema::LONG_TYPE: + if ( !is_int( $datum ) ) { + return self::wrongType( 'integer', $datum ); + } + if ( AvroSchema::LONG_MIN_VALUE > $datum + || $datum > AvroSchema::LONG_MAX_VALUE + ) { + return self::outOfRange( + AvroSchema::LONG_MIN_VALUE, + AvroSchema::LONG_MAX_VALUE, + $datum + ); + } + return []; + case AvroSchema::FLOAT_TYPE: + case AvroSchema::DOUBLE_TYPE: + if ( !is_float( $datum ) && !is_int( $datum ) ) { + return self::wrongType( 'float or integer', $datum ); + } + return []; + case AvroSchema::ARRAY_SCHEMA: + if ( !is_array( $datum ) ) { + return self::wrongType( 'array', $datum ); + } + $errors = []; + foreach ( $datum as $d ) { + $result = self::getErrors( $schema->items(), $d ); + if ( $result ) { + $errors[] = $result; + } + } + return $errors; + case AvroSchema::MAP_SCHEMA: + if ( !is_array( $datum ) ) { + return self::wrongType( 'array', $datum ); + } + $errors = []; + foreach ( $datum as $k => $v ) { + if ( !is_string( $k ) ) { + $errors[] = self::wrongType( 'string key', $k ); + } + $result = self::getErrors( $schema->values(), $v ); + if ( $result ) { + $errors[$k] = $result; + } + } + return $errors; + case AvroSchema::UNION_SCHEMA: + $errors = []; + foreach ( $schema->schemas() as $schema ) { + $result = self::getErrors( $schema, $datum ); + if ( !$result ) { + return []; + } + $errors[] = $result; + } + if ( $errors ) { + return [ "Expected any one of these to be true", $errors ]; + } + return "No schemas provided to union"; + case AvroSchema::ENUM_SCHEMA: + if ( !in_array( $datum, $schema->symbols() ) ) { + $symbols = implode( ', ', $schema->symbols ); + return "Expected one of $symbols but recieved $datum"; + } + return []; + case AvroSchema::FIXED_SCHEMA: + if ( !is_string( $datum ) ) { + return self::wrongType( 'string', $datum ); + } + $len = strlen( $datum ); + if ( $len !== $schema->size() ) { + return "Expected string of length {$schema->size()}, " + . "but recieved one of length $len"; + } + return []; + case AvroSchema::RECORD_SCHEMA: + case AvroSchema::ERROR_SCHEMA: + case AvroSchema::REQUEST_SCHEMA: + if ( !is_array( $datum ) ) { + return self::wrongType( 'array', $datum ); + } + $errors = []; + foreach ( $schema->fields() as $field ) { + $name = $field->name(); + if ( !array_key_exists( $name, $datum ) ) { + $errors[$name] = 'Missing expected field'; + continue; + } + $result = self::getErrors( $field->type(), $datum[$name] ); + if ( $result ) { + $errors[$name] = $result; + } + } + return $errors; + default: + return "Unknown avro schema type: {$schema->type}"; + } + } + + public static function typeOf( $datum ) { + return is_object( $datum ) ? get_class( $datum ) : gettype( $datum ); + } + + public static function wrongType( $expected, $datum ) { + return "Expected $expected, but recieved " . self::typeOf( $datum ); + } + + public static function outOfRange( $min, $max, $datum ) { + return "Expected value between $min and $max, but recieved $datum"; + } +} diff --git a/www/wiki/includes/utils/BatchRowIterator.php b/www/wiki/includes/utils/BatchRowIterator.php new file mode 100644 index 00000000..60720c87 --- /dev/null +++ b/www/wiki/includes/utils/BatchRowIterator.php @@ -0,0 +1,296 @@ +<?php + +use Wikimedia\Rdbms\IDatabase; + +/** + * Allows iterating a large number of rows in batches transparently. + * By default when iterated over returns the full query result as an + * array of rows. Can be wrapped in RecursiveIteratorIterator to + * collapse those arrays into a single stream of rows queried in batches. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Maintenance + */ +class BatchRowIterator implements RecursiveIterator { + + /** + * @var IDatabase $db The database to read from + */ + protected $db; + + /** + * @var string|array $table The name or names of the table to read from + */ + protected $table; + + /** + * @var array $primaryKey The name of the primary key(s) + */ + protected $primaryKey; + + /** + * @var int $batchSize The number of rows to fetch per iteration + */ + protected $batchSize; + + /** + * @var array $conditions Array of strings containing SQL conditions + * to add to the query + */ + protected $conditions = []; + + /** + * @var array $joinConditions + */ + protected $joinConditions = []; + + /** + * @var array $fetchColumns List of column names to select from the + * table suitable for use with IDatabase::select() + */ + protected $fetchColumns; + + /** + * @var string $orderBy SQL Order by condition generated from $this->primaryKey + */ + protected $orderBy; + + /** + * @var array $current The current iterator value + */ + private $current = []; + + /** + * @var int key 0-indexed number of pages fetched since self::reset() + */ + private $key; + + /** + * @var array Additional query options + */ + protected $options = []; + + /** + * @param IDatabase $db The database to read from + * @param string|array $table The name or names of the table to read from + * @param string|array $primaryKey The name or names of the primary key columns + * @param int $batchSize The number of rows to fetch per iteration + * @throws InvalidArgumentException + */ + public function __construct( IDatabase $db, $table, $primaryKey, $batchSize ) { + if ( $batchSize < 1 ) { + throw new InvalidArgumentException( 'Batch size must be at least 1 row.' ); + } + $this->db = $db; + $this->table = $table; + $this->primaryKey = (array)$primaryKey; + $this->fetchColumns = $this->primaryKey; + $this->orderBy = implode( ' ASC,', $this->primaryKey ) . ' ASC'; + $this->batchSize = $batchSize; + } + + /** + * @param array $conditions Query conditions suitable for use with + * IDatabase::select + */ + public function addConditions( array $conditions ) { + $this->conditions = array_merge( $this->conditions, $conditions ); + } + + /** + * @param array $options Query options suitable for use with + * IDatabase::select + */ + public function addOptions( array $options ) { + $this->options = array_merge( $this->options, $options ); + } + + /** + * @param array $conditions Query join conditions suitable for use + * with IDatabase::select + */ + public function addJoinConditions( array $conditions ) { + $this->joinConditions = array_merge( $this->joinConditions, $conditions ); + } + + /** + * @param array $columns List of column names to select from the + * table suitable for use with IDatabase::select() + */ + public function setFetchColumns( array $columns ) { + // If it's not the all column selector merge in the primary keys we need + if ( count( $columns ) === 1 && reset( $columns ) === '*' ) { + $this->fetchColumns = $columns; + } else { + $this->fetchColumns = array_unique( array_merge( + $this->primaryKey, + $columns + ) ); + } + } + + /** + * Extracts the primary key(s) from a database row. + * + * @param stdClass $row An individual database row from this iterator + * @return array Map of primary key column to value within the row + */ + public function extractPrimaryKeys( $row ) { + $pk = []; + foreach ( $this->primaryKey as $alias => $column ) { + $name = is_numeric( $alias ) ? $column : $alias; + $pk[$name] = $row->{$name}; + } + return $pk; + } + + /** + * @return array The most recently fetched set of rows from the database + */ + public function current() { + return $this->current; + } + + /** + * @return int 0-indexed count of the page number fetched + */ + public function key() { + return $this->key; + } + + /** + * Reset the iterator to the begining of the table. + */ + public function rewind() { + $this->key = -1; // self::next() will turn this into 0 + $this->current = []; + $this->next(); + } + + /** + * @return bool True when the iterator is in a valid state + */ + public function valid() { + return (bool)$this->current; + } + + /** + * @return bool True when this result set has rows + */ + public function hasChildren() { + return $this->current && count( $this->current ); + } + + /** + * @return RecursiveIterator + */ + public function getChildren() { + return new NotRecursiveIterator( new ArrayIterator( $this->current ) ); + } + + /** + * Fetch the next set of rows from the database. + */ + public function next() { + $res = $this->db->select( + $this->table, + $this->fetchColumns, + $this->buildConditions(), + __METHOD__, + [ + 'LIMIT' => $this->batchSize, + 'ORDER BY' => $this->orderBy, + ] + $this->options, + $this->joinConditions + ); + + // The iterator is converted to an array because in addition to + // returning it in self::current() we need to use the end value + // in self::buildConditions() + $this->current = iterator_to_array( $res ); + $this->key++; + } + + /** + * Uses the primary key list and the maximal result row from the + * previous iteration to build an SQL condition sufficient for + * selecting the next page of results. All except the final key use + * `=` conditions while the final key uses a `>` condition + * + * Example output: + * [ '( foo = 42 AND bar > 7 ) OR ( foo > 42 )' ] + * + * @return array The SQL conditions necessary to select the next set + * of rows in the batched query + */ + protected function buildConditions() { + if ( !$this->current ) { + return $this->conditions; + } + + $maxRow = end( $this->current ); + $maximumValues = []; + foreach ( $this->primaryKey as $alias => $column ) { + $name = is_numeric( $alias ) ? $column : $alias; + $maximumValues[$column] = $this->db->addQuotes( $maxRow->{$name} ); + } + + $pkConditions = []; + // For example: If we have 3 primary keys + // first run through will generate + // col1 = 4 AND col2 = 7 AND col3 > 1 + // second run through will generate + // col1 = 4 AND col2 > 7 + // and the final run through will generate + // col1 > 4 + while ( $maximumValues ) { + $pkConditions[] = $this->buildGreaterThanCondition( $maximumValues ); + array_pop( $maximumValues ); + } + + $conditions = $this->conditions; + $conditions[] = sprintf( '( %s )', implode( ' ) OR ( ', $pkConditions ) ); + + return $conditions; + } + + /** + * Given an array of column names and their maximum value generate + * an SQL condition where all keys except the last match $quotedMaximumValues + * exactly and the last column is greater than the matching value in + * $quotedMaximumValues + * + * @param array $quotedMaximumValues The maximum values quoted with + * $this->db->addQuotes() + * @return string An SQL condition that will select rows where all + * columns match the maximum value exactly except the last column + * which must be greater than the provided maximum value + */ + protected function buildGreaterThanCondition( array $quotedMaximumValues ) { + $keys = array_keys( $quotedMaximumValues ); + $lastColumn = end( $keys ); + $lastValue = array_pop( $quotedMaximumValues ); + $conditions = []; + foreach ( $quotedMaximumValues as $column => $value ) { + $conditions[] = "$column = $value"; + } + $conditions[] = "$lastColumn > $lastValue"; + + return implode( ' AND ', $conditions ); + } +} diff --git a/www/wiki/includes/utils/BatchRowUpdate.php b/www/wiki/includes/utils/BatchRowUpdate.php new file mode 100644 index 00000000..f42b5a07 --- /dev/null +++ b/www/wiki/includes/utils/BatchRowUpdate.php @@ -0,0 +1,128 @@ +<?php +/* + * Ties together the batch update components to provide a composable + * method of batch updating rows in a database. To use create a class + * implementing the RowUpdateGenerator interface and configure the + * BatchRowIterator and BatchRowWriter for access to the correct table. + * The components will handle reading, writing, and waiting for replica DBs + * while the generator implementation handles generating update arrays + * for singular rows. + * + * Instantiate: + * $updater = new BatchRowUpdate( + * new BatchRowIterator( $dbr, 'some_table', 'primary_key_column', 500 ), + * new BatchRowWriter( $dbw, 'some_table', 'clusterName' ), + * new MyImplementationOfRowUpdateGenerator + * ); + * + * Run: + * $updater->execute(); + * + * An example maintenance script utilizing the BatchRowUpdate can be + * located in the Echo extension file maintenance/updateSchema.php + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Maintenance + */ +class BatchRowUpdate { + /** + * @var BatchRowIterator $reader Iterator that returns an array of + * database rows + */ + protected $reader; + + /** + * @var BatchRowWriter $writer Writer capable of pushing row updates + * to the database + */ + protected $writer; + + /** + * @var RowUpdateGenerator $generator Generates single row updates + * based on the rows content + */ + protected $generator; + + /** + * @var callable $output Output callback + */ + protected $output; + + /** + * @param BatchRowIterator $reader Iterator that returns an + * array of database rows + * @param BatchRowWriter $writer Writer capable of pushing + * row updates to the database + * @param RowUpdateGenerator $generator Generates single row updates + * based on the rows content + */ + public function __construct( + BatchRowIterator $reader, BatchRowWriter $writer, RowUpdateGenerator $generator + ) { + $this->reader = $reader; + $this->writer = $writer; + $this->generator = $generator; + $this->output = function () { + }; // nop + } + + /** + * Runs the batch update process + */ + public function execute() { + foreach ( $this->reader as $rows ) { + $updates = []; + foreach ( $rows as $row ) { + $update = $this->generator->update( $row ); + if ( $update ) { + $updates[] = [ + 'primaryKey' => $this->reader->extractPrimaryKeys( $row ), + 'changes' => $update, + ]; + } + } + + if ( $updates ) { + $this->output( "Processing " . count( $updates ) . " rows\n" ); + $this->writer->write( $updates ); + } + } + + $this->output( "Completed\n" ); + } + + /** + * Accepts a callable which will receive a single parameter + * containing string status updates + * + * @param callable $output A callback taking a single string + * parameter to output + */ + public function setOutput( callable $output ) { + $this->output = $output; + } + + /** + * Write out a status update + * + * @param string $text The value to print + */ + protected function output( $text ) { + call_user_func( $this->output, $text ); + } +} diff --git a/www/wiki/includes/utils/BatchRowWriter.php b/www/wiki/includes/utils/BatchRowWriter.php new file mode 100644 index 00000000..c146e964 --- /dev/null +++ b/www/wiki/includes/utils/BatchRowWriter.php @@ -0,0 +1,75 @@ +<?php +/** + * Updates database rows by primary key in batches. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Maintenance + */ + +use MediaWiki\MediaWikiServices; +use Wikimedia\Rdbms\IDatabase; + +class BatchRowWriter { + /** + * @var IDatabase $db The database to write to + */ + protected $db; + + /** + * @var string $table The name of the table to update + */ + protected $table; + + /** + * @var string $clusterName A cluster name valid for use with LBFactory + */ + protected $clusterName; + + /** + * @param IDatabase $db The database to write to + * @param string $table The name of the table to update + * @param string|bool $clusterName A cluster name valid for use with LBFactory + */ + public function __construct( IDatabase $db, $table, $clusterName = false ) { + $this->db = $db; + $this->table = $table; + $this->clusterName = $clusterName; + } + + /** + * @param array $updates Array of arrays each containing two keys, 'primaryKey' + * and 'changes'. primaryKey must contain a map of column names to values + * sufficient to uniquely identify the row changes must contain a map of column + * names to update values to apply to the row. + */ + public function write( array $updates ) { + $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); + $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ ); + + foreach ( $updates as $update ) { + $this->db->update( + $this->table, + $update['changes'], + $update['primaryKey'], + __METHOD__ + ); + } + + $lbFactory->commitAndWaitForReplication( __METHOD__, $ticket ); + } +} diff --git a/www/wiki/includes/utils/ExecutableFinder.php b/www/wiki/includes/utils/ExecutableFinder.php new file mode 100644 index 00000000..78b3f8e2 --- /dev/null +++ b/www/wiki/includes/utils/ExecutableFinder.php @@ -0,0 +1,115 @@ +<?php +/** + * Copyright (C) 2017 Kunal Mehta <legoktm@member.fsf.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +use MediaWiki\Shell\Shell; + +/** + * Utility class to find executables in likely places + * + * @since 1.31 + */ +class ExecutableFinder { + + /** + * Get an array of likely places we can find executables. Check a bunch + * of known Unix-like defaults, as well as the PATH environment variable + * (which should maybe make it work for Windows?) + * + * @return array + */ + protected static function getPossibleBinPaths() { + return array_unique( array_merge( + [ '/usr/bin', '/bin', '/usr/local/bin', '/opt/csw/bin', + '/usr/gnu/bin', '/usr/sfw/bin', '/sw/bin', '/opt/local/bin' ], + explode( PATH_SEPARATOR, getenv( 'PATH' ) ) + ) ); + } + + /** + * Search a path for any of the given executable names. Returns the + * executable name if found. Also checks the version string returned + * by each executable. + * + * Used only by environment checks. + * + * @param string $path Path to search + * @param string $name Executable name to look for + * @param array|bool $versionInfo False or array with two members: + * 0 => Parameter to pass to binary for version check (e.g. --version) + * 1 => String to compare the output with + * + * If $versionInfo is not false, only executables with a version + * matching $versionInfo[1] will be returned. + * @return bool|string + */ + protected static function findExecutable( $path, $name, $versionInfo = false ) { + $command = $path . DIRECTORY_SEPARATOR . $name; + + Wikimedia\suppressWarnings(); + $file_exists = is_executable( $command ); + Wikimedia\restoreWarnings(); + + if ( $file_exists ) { + if ( !$versionInfo ) { + return $command; + } + + $output = Shell::command( $command, $versionInfo[0] ) + ->includeStderr()->execute()->getStdout(); + if ( strstr( $output, $versionInfo[1] ) !== false ) { + return $command; + } + } + + return false; + } + + /** + * Same as locateExecutable(), but checks in getPossibleBinPaths() by default + * @see locateExecutable() + * @param string|string[] $names Array of possible names. + * @param array|bool $versionInfo Default: false or array with two members: + * 0 => Parameter to run for version check, e.g. '--version' + * 1 => String to compare the output with + * + * If $versionInfo is not false, only executables with a version + * matching $versionInfo[1] will be returned. + * @return bool|string + */ + public static function findInDefaultPaths( $names, $versionInfo = false ) { + if ( Shell::isDisabled() ) { + // If we can't shell out, there's no point looking for executables + return false; + } + + $paths = self::getPossibleBinPaths(); + foreach ( (array)$names as $name ) { + foreach ( $paths as $path ) { + $exe = self::findExecutable( $path, $name, $versionInfo ); + if ( $exe !== false ) { + return $exe; + } + } + } + + return false; + } + +} diff --git a/www/wiki/includes/utils/FileContentsHasher.php b/www/wiki/includes/utils/FileContentsHasher.php new file mode 100644 index 00000000..e390f217 --- /dev/null +++ b/www/wiki/includes/utils/FileContentsHasher.php @@ -0,0 +1,114 @@ +<?php +/** + * Generate hash digests of file contents to help with cache invalidation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ +class FileContentsHasher { + + /** @var BagOStuff */ + protected $cache; + + /** @var FileContentsHasher */ + private static $instance; + + public function __construct() { + $this->cache = ObjectCache::getLocalServerInstance( 'hash' ); + } + + /** + * Get the singleton instance of this class. + * + * @return FileContentsHasher + */ + public static function singleton() { + if ( !self::$instance ) { + self::$instance = new self; + } + + return self::$instance; + } + + /** + * Get a hash of a file's contents, either by retrieving a previously- + * computed hash from the cache, or by computing a hash from the file. + * + * @private + * @param string $filePath Full path to the file. + * @param string $algo Name of selected hashing algorithm. + * @return string|bool Hash of file contents, or false if the file could not be read. + */ + public function getFileContentsHashInternal( $filePath, $algo = 'md4' ) { + $mtime = filemtime( $filePath ); + if ( $mtime === false ) { + return false; + } + + $cacheKey = $this->cache->makeGlobalKey( __CLASS__, $filePath, $mtime, $algo ); + $hash = $this->cache->get( $cacheKey ); + + if ( $hash ) { + return $hash; + } + + $contents = file_get_contents( $filePath ); + if ( $contents === false ) { + return false; + } + + $hash = hash( $algo, $contents ); + $this->cache->set( $cacheKey, $hash, 60 * 60 * 24 ); // 24h + + return $hash; + } + + /** + * Get a hash of the combined contents of one or more files, either by + * retrieving a previously-computed hash from the cache, or by computing + * a hash from the files. + * + * @param string|string[] $filePaths One or more file paths. + * @param string $algo Name of selected hashing algorithm. + * @return string|bool Hash of files' contents, or false if no file could not be read. + */ + public static function getFileContentsHash( $filePaths, $algo = 'md4' ) { + $instance = self::singleton(); + + if ( !is_array( $filePaths ) ) { + $filePaths = (array)$filePaths; + } + + Wikimedia\suppressWarnings(); + + if ( count( $filePaths ) === 1 ) { + $hash = $instance->getFileContentsHashInternal( $filePaths[0], $algo ); + Wikimedia\restoreWarnings(); + return $hash; + } + + sort( $filePaths ); + $hashes = array_map( function ( $filePath ) use ( $instance, $algo ) { + return $instance->getFileContentsHashInternal( $filePath, $algo ) ?: ''; + }, $filePaths ); + + Wikimedia\restoreWarnings(); + + $hashes = implode( '', $hashes ); + return $hashes ? hash( $algo, $hashes ) : false; + } +} diff --git a/www/wiki/includes/utils/MWCryptHKDF.php b/www/wiki/includes/utils/MWCryptHKDF.php new file mode 100644 index 00000000..1c8d4861 --- /dev/null +++ b/www/wiki/includes/utils/MWCryptHKDF.php @@ -0,0 +1,103 @@ +<?php +/** + * Extract-and-Expand Key Derivation Function (HKDF). A cryptographicly + * secure key expansion function based on RFC 5869. + * + * This relies on the secrecy of $wgSecretKey (by default), or $wgHKDFSecret. + * By default, sha256 is used as the underlying hashing algorithm, but any other + * algorithm can be used. Finding the secret key from the output would require + * an attacker to discover the input key (the PRK) to the hmac that generated + * the output, and discover the particular data, hmac'ed with an evolving key + * (salt), to produce the PRK. Even with md5, no publicly known attacks make + * this currently feasible. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @author Chris Steipp + * @file + */ + +use MediaWiki\MediaWikiServices; + +class MWCryptHKDF { + + /** + * Return a singleton instance, based on the global configs. + * @return CryptHKDF + */ + protected static function singleton() { + return MediaWikiServices::getInstance()->getCryptHKDF(); + } + + /** + * RFC5869 defines HKDF in 2 steps, extraction and expansion. + * From http://eprint.iacr.org/2010/264.pdf: + * + * The scheme HKDF is specifed as: + * HKDF(XTS, SKM, CTXinfo, L) = K(1) || K(2) || ... || K(t) + * where the values K(i) are defined as follows: + * PRK = HMAC(XTS, SKM) + * K(1) = HMAC(PRK, CTXinfo || 0); + * K(i+1) = HMAC(PRK, K(i) || CTXinfo || i), 1 <= i < t; + * where t = [L/k] and the value K(t) is truncated to its first d = L mod k bits; + * the counter i is non-wrapping and of a given fixed size, e.g., a single byte. + * Note that the length of the HMAC output is the same as its key length and therefore + * the scheme is well defined. + * + * XTS is the "extractor salt" + * SKM is the "secret keying material" + * + * N.B. http://eprint.iacr.org/2010/264.pdf seems to differ from RFC 5869 in that the test + * vectors from RFC 5869 only work if K(0) = '' and K(1) = HMAC(PRK, K(0) || CTXinfo || 1) + * + * @param string $hash The hashing function to use (e.g., sha256) + * @param string $ikm The input keying material + * @param string $salt The salt to add to the ikm, to get the prk + * @param string $info Optional context (change the output without affecting + * the randomness properties of the output) + * @param int $L Number of bytes to return + * @return string Cryptographically secure pseudorandom binary string + */ + public static function HKDF( $hash, $ikm, $salt, $info, $L ) { + return CryptHKDF::HKDF( $hash, $ikm, $salt, $info, $L ); + } + + /** + * Generate cryptographically random data and return it in raw binary form. + * + * @param int $bytes The number of bytes of random data to generate + * @param string $context String to mix into HMAC context + * @return string Binary string of length $bytes + */ + public static function generate( $bytes, $context ) { + return self::singleton()->generate( $bytes, $context ); + } + + /** + * Generate cryptographically random data and return it in hexadecimal string format. + * See MWCryptRand::realGenerateHex for details of the char-to-byte conversion logic. + * + * @param int $chars The number of hex chars of random data to generate + * @param string $context String to mix into HMAC context + * @return string Random hex characters, $chars long + */ + public static function generateHex( $chars, $context = '' ) { + $bytes = ceil( $chars / 2 ); + $hex = bin2hex( self::singleton()->generate( $bytes, $context ) ); + return substr( $hex, 0, $chars ); + } + +} diff --git a/www/wiki/includes/utils/MWCryptRand.php b/www/wiki/includes/utils/MWCryptRand.php new file mode 100644 index 00000000..58189580 --- /dev/null +++ b/www/wiki/includes/utils/MWCryptRand.php @@ -0,0 +1,79 @@ +<?php +/** + * A cryptographic random generator class used for generating secret keys + * + * This is based in part on Drupal code as well as what we used in our own code + * prior to introduction of this class. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @author Daniel Friesen + * @file + */ + +use MediaWiki\MediaWikiServices; + +class MWCryptRand { + /** + * @return CryptRand + */ + protected static function singleton() { + return MediaWikiServices::getInstance()->getCryptRand(); + } + + /** + * Return a boolean indicating whether or not the source used for cryptographic + * random bytes generation in the previously run generate* call + * was cryptographically strong. + * + * @return bool Returns true if the source was strong, false if not. + */ + public static function wasStrong() { + return self::singleton()->wasStrong(); + } + + /** + * Generate a run of (ideally) cryptographically random data and return + * it in raw binary form. + * You can use MWCryptRand::wasStrong() if you wish to know if the source used + * was cryptographically strong. + * + * @param int $bytes The number of bytes of random data to generate + * @param bool $forceStrong Pass true if you want generate to prefer cryptographically + * strong sources of entropy even if reading from them may steal + * more entropy from the system than optimal. + * @return string Raw binary random data + */ + public static function generate( $bytes, $forceStrong = false ) { + return self::singleton()->generate( $bytes, $forceStrong ); + } + + /** + * Generate a run of (ideally) cryptographically random data and return + * it in hexadecimal string format. + * You can use MWCryptRand::wasStrong() if you wish to know if the source used + * was cryptographically strong. + * + * @param int $chars The number of hex chars of random data to generate + * @param bool $forceStrong Pass true if you want generate to prefer cryptographically + * strong sources of entropy even if reading from them may steal + * more entropy from the system than optimal. + * @return string Hexadecimal random data + */ + public static function generateHex( $chars, $forceStrong = false ) { + return self::singleton()->generateHex( $chars, $forceStrong ); + } +} diff --git a/www/wiki/includes/utils/MWFileProps.php b/www/wiki/includes/utils/MWFileProps.php new file mode 100644 index 00000000..9d05c6ab --- /dev/null +++ b/www/wiki/includes/utils/MWFileProps.php @@ -0,0 +1,145 @@ +<?php +/** + * MimeMagic helper functions for detecting and dealing with MIME types. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * MimeMagic helper wrapper + * + * @since 1.28 + */ +class MWFileProps { + /** @var MimeMagic */ + private $magic; + + /** + * @param MimeAnalyzer $magic + */ + public function __construct( MimeAnalyzer $magic ) { + $this->magic = $magic; + } + + /** + * Get an associative array containing information about + * a file with the given storage path. + * + * Resulting array fields include: + * - fileExists + * - size (filesize in bytes) + * - mime (as major/minor) + * - media_type (value to be used with the MEDIATYPE_xxx constants) + * - metadata (handler specific) + * - sha1 (in base 36) + * - width + * - height + * - bits (bitrate) + * - file-mime + * - major_mime + * - minor_mime + * + * @param string $path Filesystem path to a file + * @param string|bool $ext The file extension, or true to extract it from the filename. + * Set it to false to ignore the extension. + * @return array + * @since 1.28 + */ + public function getPropsFromPath( $path, $ext ) { + $fsFile = new FSFile( $path ); + + $info = $this->newPlaceholderProps(); + $info['fileExists'] = $fsFile->exists(); + if ( $info['fileExists'] ) { + $info['size'] = $fsFile->getSize(); // bytes + $info['sha1'] = $fsFile->getSha1Base36(); + + # MIME type according to file contents + $info['file-mime'] = $this->magic->guessMimeType( $path, false ); + # Logical MIME type + $ext = ( $ext === true ) ? FileBackend::extensionFromPath( $path ) : $ext; + $info['mime'] = $this->magic->improveTypeFromExtension( $info['file-mime'], $ext ); + + list( $info['major_mime'], $info['minor_mime'] ) = File::splitMime( $info['mime'] ); + $info['media_type'] = $this->magic->getMediaType( $path, $info['mime'] ); + + # Height, width and metadata + $handler = MediaHandler::getHandler( $info['mime'] ); + if ( $handler ) { + $info['metadata'] = $handler->getMetadata( $fsFile, $path ); + /** @noinspection PhpMethodParametersCountMismatchInspection */ + $gis = $handler->getImageSize( $fsFile, $path, $info['metadata'] ); + if ( is_array( $gis ) ) { + $info = $this->extractImageSizeInfo( $gis ) + $info; + } + } + } + + return $info; + } + + /** + * Exract image size information + * + * @param array $gis + * @return array + */ + private function extractImageSizeInfo( array $gis ) { + $info = []; + # NOTE: $gis[2] contains a code for the image type. This is no longer used. + $info['width'] = $gis[0]; + $info['height'] = $gis[1]; + if ( isset( $gis['bits'] ) ) { + $info['bits'] = $gis['bits']; + } else { + $info['bits'] = 0; + } + + return $info; + } + + /** + * Empty place holder props for non-existing files + * + * Resulting array fields include: + * - fileExists + * - size (filesize in bytes) + * - mime (as major/minor) + * - media_type (value to be used with the MEDIATYPE_xxx constants) + * - metadata (handler specific) + * - sha1 (in base 36) + * - width + * - height + * - bits (bitrate) + * - file-mime + * - major_mime + * - minor_mime + * + * @return array + * @since 1.28 + */ + public function newPlaceholderProps() { + return FSFile::placeholderProps() + [ + 'metadata' => '', + 'width' => 0, + 'height' => 0, + 'bits' => 0, + 'media_type' => MEDIATYPE_UNKNOWN + ]; + } +} diff --git a/www/wiki/includes/utils/MWRestrictions.php b/www/wiki/includes/utils/MWRestrictions.php new file mode 100644 index 00000000..caf88a15 --- /dev/null +++ b/www/wiki/includes/utils/MWRestrictions.php @@ -0,0 +1,147 @@ +<?php +/** + * A class to check request restrictions expressed as a JSON object + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + */ + +/** + * A class to check request restrictions expressed as a JSON object + */ +class MWRestrictions { + + private $ipAddresses = [ '0.0.0.0/0', '::/0' ]; + + /** + * @param array $restrictions + * @throws InvalidArgumentException + */ + protected function __construct( array $restrictions = null ) { + if ( $restrictions !== null ) { + $this->loadFromArray( $restrictions ); + } + } + + /** + * @return MWRestrictions + */ + public static function newDefault() { + return new self(); + } + + /** + * @param array $restrictions + * @return MWRestrictions + * @throws InvalidArgumentException + */ + public static function newFromArray( array $restrictions ) { + return new self( $restrictions ); + } + + /** + * @param string $json JSON representation of the restrictions + * @return MWRestrictions + * @throws InvalidArgumentException + */ + public static function newFromJson( $json ) { + $restrictions = FormatJson::decode( $json, true ); + if ( !is_array( $restrictions ) ) { + throw new InvalidArgumentException( 'Invalid restrictions JSON' ); + } + return new self( $restrictions ); + } + + private function loadFromArray( array $restrictions ) { + static $validKeys = [ 'IPAddresses' ]; + static $neededKeys = [ 'IPAddresses' ]; + + $keys = array_keys( $restrictions ); + $invalidKeys = array_diff( $keys, $validKeys ); + if ( $invalidKeys ) { + throw new InvalidArgumentException( + 'Array contains invalid keys: ' . implode( ', ', $invalidKeys ) + ); + } + $missingKeys = array_diff( $neededKeys, $keys ); + if ( $missingKeys ) { + throw new InvalidArgumentException( + 'Array is missing required keys: ' . implode( ', ', $missingKeys ) + ); + } + + if ( !is_array( $restrictions['IPAddresses'] ) ) { + throw new InvalidArgumentException( 'IPAddresses is not an array' ); + } + foreach ( $restrictions['IPAddresses'] as $ip ) { + if ( !\IP::isIPAddress( $ip ) ) { + throw new InvalidArgumentException( "Invalid IP address: $ip" ); + } + } + $this->ipAddresses = $restrictions['IPAddresses']; + } + + /** + * Return the restrictions as an array + * @return array + */ + public function toArray() { + return [ + 'IPAddresses' => $this->ipAddresses, + ]; + } + + /** + * Return the restrictions as a JSON string + * @param bool|string $pretty Pretty-print the JSON output, see FormatJson::encode + * @return string + */ + public function toJson( $pretty = false ) { + return FormatJson::encode( $this->toArray(), $pretty, FormatJson::ALL_OK ); + } + + public function __toString() { + return $this->toJson(); + } + + /** + * Test against the passed WebRequest + * @param WebRequest $request + * @return Status + */ + public function check( WebRequest $request ) { + $ok = [ + 'ip' => $this->checkIP( $request->getIP() ), + ]; + $status = Status::newGood(); + $status->setResult( $ok === array_filter( $ok ), $ok ); + return $status; + } + + /** + * Test an IP address + * @param string $ip + * @return bool + */ + public function checkIP( $ip ) { + foreach ( $this->ipAddresses as $range ) { + if ( \IP::isInRange( $ip, $range ) ) { + return true; + } + } + + return false; + } +} diff --git a/www/wiki/includes/utils/README b/www/wiki/includes/utils/README new file mode 100644 index 00000000..b5b8ec88 --- /dev/null +++ b/www/wiki/includes/utils/README @@ -0,0 +1,9 @@ +The classes in this directory are general utilities for use by any part of +MediaWiki. They do not favour any particular user interface and are not +constrained to serve any particular feature. This is similar to includes/libs, +except that some dependency on the MediaWiki framework (such as the use of +MWException, Status or wfDebug()) disqualifies them from use outside of +MediaWiki without modification. + +Utilities should not use global configuration variables, rather they should rely +on the caller to configure their behaviour. diff --git a/www/wiki/includes/utils/RowUpdateGenerator.php b/www/wiki/includes/utils/RowUpdateGenerator.php new file mode 100644 index 00000000..342dffd6 --- /dev/null +++ b/www/wiki/includes/utils/RowUpdateGenerator.php @@ -0,0 +1,39 @@ +<?php +/** + * Interface for generating updates to single rows in the database. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Maintenance + */ +interface RowUpdateGenerator { + /** + * Given a database row, generates an array mapping column names to + * updated value within the database row. + * + * Sample Response: + * return [ + * 'some_col' => 'new value', + * 'other_col' => 99, + * ]; + * + * @param stdClass $row A row from the database + * @return array Map of column names to updated value within the + * database row. When no update is required returns an empty array. + */ + public function update( $row ); +} diff --git a/www/wiki/includes/utils/UIDGenerator.php b/www/wiki/includes/utils/UIDGenerator.php new file mode 100644 index 00000000..4d5c3af8 --- /dev/null +++ b/www/wiki/includes/utils/UIDGenerator.php @@ -0,0 +1,629 @@ +<?php +/** + * This file deals with UID generation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ +use Wikimedia\Assert\Assert; +use MediaWiki\MediaWikiServices; + +/** + * Class for getting statistically unique IDs + * + * @since 1.21 + */ +class UIDGenerator { + /** @var UIDGenerator */ + protected static $instance = null; + + protected $nodeIdFile; // string; local file path + protected $nodeId32; // string; node ID in binary (32 bits) + protected $nodeId48; // string; node ID in binary (48 bits) + + protected $lockFile88; // string; local file path + protected $lockFile128; // string; local file path + protected $lockFileUUID; // string; local file path + + /** @var array */ + protected $fileHandles = []; // cache file handles + + const QUICK_RAND = 1; // get randomness from fast and insecure sources + const QUICK_VOLATILE = 2; // use an APC like in-memory counter if available + + protected function __construct() { + $this->nodeIdFile = wfTempDir() . '/mw-' . __CLASS__ . '-UID-nodeid'; + $nodeId = ''; + if ( is_file( $this->nodeIdFile ) ) { + $nodeId = file_get_contents( $this->nodeIdFile ); + } + // Try to get some ID that uniquely identifies this machine (RFC 4122)... + if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) { + Wikimedia\suppressWarnings(); + if ( wfIsWindows() ) { + // https://technet.microsoft.com/en-us/library/bb490913.aspx + $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) ); + $line = substr( $csv, 0, strcspn( $csv, "\n" ) ); + $info = str_getcsv( $line ); + $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : ''; + } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X + // See https://linux.die.net/man/8/ifconfig + $m = []; + preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/', + wfShellExec( '/sbin/ifconfig -a' ), $m ); + $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : ''; + } + Wikimedia\restoreWarnings(); + if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) { + $nodeId = MWCryptRand::generateHex( 12, true ); + $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); // set multicast bit + } + file_put_contents( $this->nodeIdFile, $nodeId ); // cache + } + $this->nodeId32 = Wikimedia\base_convert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 ); + $this->nodeId48 = Wikimedia\base_convert( $nodeId, 16, 2, 48 ); + // If different processes run as different users, they may have different temp dirs. + // This is dealt with by initializing the clock sequence number and counters randomly. + $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88'; + $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128'; + $this->lockFileUUID = wfTempDir() . '/mw-' . __CLASS__ . '-UUID-128'; + } + + /** + * @todo: move to MW-specific factory class and inject temp dir + * @return UIDGenerator + */ + protected static function singleton() { + if ( self::$instance === null ) { + self::$instance = new self(); + } + + return self::$instance; + } + + /** + * Get a statistically unique 88-bit unsigned integer ID string. + * The bits of the UID are prefixed with the time (down to the millisecond). + * + * These IDs are suitable as values for the shard key of distributed data. + * If a column uses these as values, it should be declared UNIQUE to handle collisions. + * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast. + * They can also be stored "DECIMAL(27) UNSIGNED" or BINARY(11) in MySQL. + * + * UID generation is serialized on each server (as the node ID is for the whole machine). + * + * @param int $base Specifies a base other than 10 + * @return string Number + * @throws RuntimeException + */ + public static function newTimestampedUID88( $base = 10 ) { + Assert::parameterType( 'integer', $base, '$base' ); + Assert::parameter( $base <= 36, '$base', 'must be <= 36' ); + Assert::parameter( $base >= 2, '$base', 'must be >= 2' ); + + $gen = self::singleton(); + $info = $gen->getTimeAndDelay( 'lockFile88', 1, 1024, 1024 ); + $info['offsetCounter'] = $info['offsetCounter'] % 1024; + return Wikimedia\base_convert( $gen->getTimestampedID88( $info ), 2, $base ); + } + + /** + * @param array $info The result of UIDGenerator::getTimeAndDelay() or + * a plain (UIDGenerator::millitime(), counter, clock sequence) array. + * @return string 88 bits + * @throws RuntimeException + */ + protected function getTimestampedID88( array $info ) { + if ( isset( $info['time'] ) ) { + $time = $info['time']; + $counter = $info['offsetCounter']; + } else { + $time = $info[0]; + $counter = $info[1]; + } + // Take the 46 LSBs of "milliseconds since epoch" + $id_bin = $this->millisecondsSinceEpochBinary( $time ); + // Add a 10 bit counter resulting in 56 bits total + $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT ); + // Add the 32 bit node ID resulting in 88 bits total + $id_bin .= $this->nodeId32; + // Convert to a 1-27 digit integer string + if ( strlen( $id_bin ) !== 88 ) { + throw new RuntimeException( "Detected overflow for millisecond timestamp." ); + } + + return $id_bin; + } + + /** + * Get a statistically unique 128-bit unsigned integer ID string. + * The bits of the UID are prefixed with the time (down to the millisecond). + * + * These IDs are suitable as globally unique IDs, without any enforced uniqueness. + * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast. + * They can also be stored as "DECIMAL(39) UNSIGNED" or BINARY(16) in MySQL. + * + * UID generation is serialized on each server (as the node ID is for the whole machine). + * + * @param int $base Specifies a base other than 10 + * @return string Number + * @throws RuntimeException + */ + public static function newTimestampedUID128( $base = 10 ) { + Assert::parameterType( 'integer', $base, '$base' ); + Assert::parameter( $base <= 36, '$base', 'must be <= 36' ); + Assert::parameter( $base >= 2, '$base', 'must be >= 2' ); + + $gen = self::singleton(); + $info = $gen->getTimeAndDelay( 'lockFile128', 16384, 1048576, 1048576 ); + $info['offsetCounter'] = $info['offsetCounter'] % 1048576; + + return Wikimedia\base_convert( $gen->getTimestampedID128( $info ), 2, $base ); + } + + /** + * @param array $info The result of UIDGenerator::getTimeAndDelay() or + * a plain (UIDGenerator::millitime(), counter, clock sequence) array. + * @return string 128 bits + * @throws RuntimeException + */ + protected function getTimestampedID128( array $info ) { + if ( isset( $info['time'] ) ) { + $time = $info['time']; + $counter = $info['offsetCounter']; + $clkSeq = $info['clkSeq']; + } else { + $time = $info[0]; + $counter = $info[1]; + $clkSeq = $info[2]; + } + // Take the 46 LSBs of "milliseconds since epoch" + $id_bin = $this->millisecondsSinceEpochBinary( $time ); + // Add a 20 bit counter resulting in 66 bits total + $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT ); + // Add a 14 bit clock sequence number resulting in 80 bits total + $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT ); + // Add the 48 bit node ID resulting in 128 bits total + $id_bin .= $this->nodeId48; + // Convert to a 1-39 digit integer string + if ( strlen( $id_bin ) !== 128 ) { + throw new RuntimeException( "Detected overflow for millisecond timestamp." ); + } + + return $id_bin; + } + + /** + * Return an RFC4122 compliant v1 UUID + * + * @return string + * @throws RuntimeException + * @since 1.27 + */ + public static function newUUIDv1() { + $gen = self::singleton(); + // There can be up to 10000 intervals for the same millisecond timestamp. + // [0,4999] counter + [0,5000] offset is in [0,9999] for the offset counter. + // Add this onto the timestamp to allow making up to 5000 IDs per second. + return $gen->getUUIDv1( $gen->getTimeAndDelay( 'lockFileUUID', 16384, 5000, 5001 ) ); + } + + /** + * Return an RFC4122 compliant v1 UUID + * + * @return string 32 hex characters with no hyphens + * @throws RuntimeException + * @since 1.27 + */ + public static function newRawUUIDv1() { + return str_replace( '-', '', self::newUUIDv1() ); + } + + /** + * @param array $info Result of UIDGenerator::getTimeAndDelay() + * @return string 128 bits + */ + protected function getUUIDv1( array $info ) { + $clkSeq_bin = Wikimedia\base_convert( $info['clkSeq'], 10, 2, 14 ); + $time_bin = $this->intervalsSinceGregorianBinary( $info['time'], $info['offsetCounter'] ); + // Take the 32 bits of "time low" + $id_bin = substr( $time_bin, 28, 32 ); + // Add 16 bits of "time mid" resulting in 48 bits total + $id_bin .= substr( $time_bin, 12, 16 ); + // Add 4 bit version resulting in 52 bits total + $id_bin .= '0001'; + // Add 12 bits of "time high" resulting in 64 bits total + $id_bin .= substr( $time_bin, 0, 12 ); + // Add 2 bits of "variant" resulting in 66 bits total + $id_bin .= '10'; + // Add 6 bits of "clock seq high" resulting in 72 bits total + $id_bin .= substr( $clkSeq_bin, 0, 6 ); + // Add 8 bits of "clock seq low" resulting in 80 bits total + $id_bin .= substr( $clkSeq_bin, 6, 8 ); + // Add the 48 bit node ID resulting in 128 bits total + $id_bin .= $this->nodeId48; + // Convert to a 32 char hex string with dashes + if ( strlen( $id_bin ) !== 128 ) { + throw new RuntimeException( "Detected overflow for millisecond timestamp." ); + } + $hex = Wikimedia\base_convert( $id_bin, 2, 16, 32 ); + return sprintf( '%s-%s-%s-%s-%s', + // "time_low" (32 bits) + substr( $hex, 0, 8 ), + // "time_mid" (16 bits) + substr( $hex, 8, 4 ), + // "time_hi_and_version" (16 bits) + substr( $hex, 12, 4 ), + // "clk_seq_hi_res" (8 bits) and "clk_seq_low" (8 bits) + substr( $hex, 16, 4 ), + // "node" (48 bits) + substr( $hex, 20, 12 ) + ); + } + + /** + * Return an RFC4122 compliant v4 UUID + * + * @param int $flags Bitfield (supports UIDGenerator::QUICK_RAND) + * @return string + * @throws RuntimeException + */ + public static function newUUIDv4( $flags = 0 ) { + $hex = ( $flags & self::QUICK_RAND ) + ? wfRandomString( 31 ) + : MWCryptRand::generateHex( 31 ); + + return sprintf( '%s-%s-%s-%s-%s', + // "time_low" (32 bits) + substr( $hex, 0, 8 ), + // "time_mid" (16 bits) + substr( $hex, 8, 4 ), + // "time_hi_and_version" (16 bits) + '4' . substr( $hex, 12, 3 ), + // "clk_seq_hi_res" (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits) + dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ), + // "node" (48 bits) + substr( $hex, 19, 12 ) + ); + } + + /** + * Return an RFC4122 compliant v4 UUID + * + * @param int $flags Bitfield (supports UIDGenerator::QUICK_RAND) + * @return string 32 hex characters with no hyphens + * @throws RuntimeException + */ + public static function newRawUUIDv4( $flags = 0 ) { + return str_replace( '-', '', self::newUUIDv4( $flags ) ); + } + + /** + * Return an ID that is sequential *only* for this node and bucket + * + * These IDs are suitable for per-host sequence numbers, e.g. for some packet protocols. + * If UIDGenerator::QUICK_VOLATILE is used the counter might reset on server restart. + * + * @param string $bucket Arbitrary bucket name (should be ASCII) + * @param int $bits Bit size (<=48) of resulting numbers before wrap-around + * @param int $flags (supports UIDGenerator::QUICK_VOLATILE) + * @return float Integer value as float + * @since 1.23 + */ + public static function newSequentialPerNodeID( $bucket, $bits = 48, $flags = 0 ) { + return current( self::newSequentialPerNodeIDs( $bucket, $bits, 1, $flags ) ); + } + + /** + * Return IDs that are sequential *only* for this node and bucket + * + * @see UIDGenerator::newSequentialPerNodeID() + * @param string $bucket Arbitrary bucket name (should be ASCII) + * @param int $bits Bit size (16 to 48) of resulting numbers before wrap-around + * @param int $count Number of IDs to return + * @param int $flags (supports UIDGenerator::QUICK_VOLATILE) + * @return array Ordered list of float integer values + * @since 1.23 + */ + public static function newSequentialPerNodeIDs( $bucket, $bits, $count, $flags = 0 ) { + $gen = self::singleton(); + return $gen->getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ); + } + + /** + * Return IDs that are sequential *only* for this node and bucket + * + * @see UIDGenerator::newSequentialPerNodeID() + * @param string $bucket Arbitrary bucket name (should be ASCII) + * @param int $bits Bit size (16 to 48) of resulting numbers before wrap-around + * @param int $count Number of IDs to return + * @param int $flags (supports UIDGenerator::QUICK_VOLATILE) + * @return array Ordered list of float integer values + * @throws RuntimeException + */ + protected function getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ) { + if ( $count <= 0 ) { + return []; // nothing to do + } elseif ( $bits < 16 || $bits > 48 ) { + throw new RuntimeException( "Requested bit size ($bits) is out of range." ); + } + + $counter = null; // post-increment persistent counter value + + // Use APC/etc if requested, available, and not in CLI mode; + // Counter values would not survive accross script instances in CLI mode. + $cache = null; + if ( ( $flags & self::QUICK_VOLATILE ) && !wfIsCLI() ) { + $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache(); + } + if ( $cache ) { + $counter = $cache->incrWithInit( $bucket, $cache::TTL_INDEFINITE, $count, $count ); + if ( $counter === false ) { + throw new RuntimeException( 'Unable to set value to ' . get_class( $cache ) ); + } + } + + // Note: use of fmod() avoids "division by zero" on 32 bit machines + if ( $counter === null ) { + $path = wfTempDir() . '/mw-' . __CLASS__ . '-' . rawurlencode( $bucket ) . '-48'; + // Get the UID lock file handle + if ( isset( $this->fileHandles[$path] ) ) { + $handle = $this->fileHandles[$path]; + } else { + $handle = fopen( $path, 'cb+' ); + $this->fileHandles[$path] = $handle ?: null; // cache + } + // Acquire the UID lock file + if ( $handle === false ) { + throw new RuntimeException( "Could not open '{$path}'." ); + } elseif ( !flock( $handle, LOCK_EX ) ) { + fclose( $handle ); + throw new RuntimeException( "Could not acquire '{$path}'." ); + } + // Fetch the counter value and increment it... + rewind( $handle ); + $counter = floor( trim( fgets( $handle ) ) ) + $count; // fetch as float + // Write back the new counter value + ftruncate( $handle, 0 ); + rewind( $handle ); + fwrite( $handle, fmod( $counter, pow( 2, 48 ) ) ); // warp-around as needed + fflush( $handle ); + // Release the UID lock file + flock( $handle, LOCK_UN ); + } + + $ids = []; + $divisor = pow( 2, $bits ); + $currentId = floor( $counter - $count ); // pre-increment counter value + for ( $i = 0; $i < $count; ++$i ) { + $ids[] = fmod( ++$currentId, $divisor ); + } + + return $ids; + } + + /** + * Get a (time,counter,clock sequence) where (time,counter) is higher + * than any previous (time,counter) value for the given clock sequence. + * This is useful for making UIDs sequential on a per-node bases. + * + * @param string $lockFile Name of a local lock file + * @param int $clockSeqSize The number of possible clock sequence values + * @param int $counterSize The number of possible counter values + * @param int $offsetSize The number of possible offset values + * @return array (result of UIDGenerator::millitime(), counter, clock sequence) + * @throws RuntimeException + */ + protected function getTimeAndDelay( $lockFile, $clockSeqSize, $counterSize, $offsetSize ) { + // Get the UID lock file handle + if ( isset( $this->fileHandles[$lockFile] ) ) { + $handle = $this->fileHandles[$lockFile]; + } else { + $handle = fopen( $this->$lockFile, 'cb+' ); + $this->fileHandles[$lockFile] = $handle ?: null; // cache + } + // Acquire the UID lock file + if ( $handle === false ) { + throw new RuntimeException( "Could not open '{$this->$lockFile}'." ); + } elseif ( !flock( $handle, LOCK_EX ) ) { + fclose( $handle ); + throw new RuntimeException( "Could not acquire '{$this->$lockFile}'." ); + } + // Get the current timestamp, clock sequence number, last time, and counter + rewind( $handle ); + $data = explode( ' ', fgets( $handle ) ); // "<clk seq> <sec> <msec> <counter> <offset>" + $clockChanged = false; // clock set back significantly? + if ( count( $data ) == 5 ) { // last UID info already initialized + $clkSeq = (int)$data[0] % $clockSeqSize; + $prevTime = [ (int)$data[1], (int)$data[2] ]; + $offset = (int)$data[4] % $counterSize; // random counter offset + $counter = 0; // counter for UIDs with the same timestamp + // Delay until the clock reaches the time of the last ID. + // This detects any microtime() drift among processes. + $time = $this->timeWaitUntil( $prevTime ); + if ( !$time ) { // too long to delay? + $clockChanged = true; // bump clock sequence number + $time = self::millitime(); + } elseif ( $time == $prevTime ) { + // Bump the counter if there are timestamp collisions + $counter = (int)$data[3] % $counterSize; + if ( ++$counter >= $counterSize ) { // sanity (starts at 0) + flock( $handle, LOCK_UN ); // abort + throw new RuntimeException( "Counter overflow for timestamp value." ); + } + } + } else { // last UID info not initialized + $clkSeq = mt_rand( 0, $clockSeqSize - 1 ); + $counter = 0; + $offset = mt_rand( 0, $offsetSize - 1 ); + $time = self::millitime(); + } + // microtime() and gettimeofday() can drift from time() at least on Windows. + // The drift is immediate for processes running while the system clock changes. + // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659. + if ( abs( time() - $time[0] ) >= 2 ) { + // We don't want processes using too high or low timestamps to avoid duplicate + // UIDs and clock sequence number churn. This process should just be restarted. + flock( $handle, LOCK_UN ); // abort + throw new RuntimeException( "Process clock is outdated or drifted." ); + } + // If microtime() is synced and a clock change was detected, then the clock went back + if ( $clockChanged ) { + // Bump the clock sequence number and also randomize the counter offset, + // which is useful for UIDs that do not include the clock sequence number. + $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize; + $offset = mt_rand( 0, $offsetSize - 1 ); + trigger_error( "Clock was set back; sequence number incremented." ); + } + // Update the (clock sequence number, timestamp, counter) + ftruncate( $handle, 0 ); + rewind( $handle ); + fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" ); + fflush( $handle ); + // Release the UID lock file + flock( $handle, LOCK_UN ); + + return [ + 'time' => $time, + 'counter' => $counter, + 'clkSeq' => $clkSeq, + 'offset' => $offset, + 'offsetCounter' => $counter + $offset + ]; + } + + /** + * Wait till the current timestamp reaches $time and return the current + * timestamp. This returns false if it would have to wait more than 10ms. + * + * @param array $time Result of UIDGenerator::millitime() + * @return array|bool UIDGenerator::millitime() result or false + */ + protected function timeWaitUntil( array $time ) { + do { + $ct = self::millitime(); + if ( $ct >= $time ) { // https://secure.php.net/manual/en/language.operators.comparison.php + return $ct; // current timestamp is higher than $time + } + } while ( ( ( $time[0] - $ct[0] ) * 1000 + ( $time[1] - $ct[1] ) ) <= 10 ); + + return false; + } + + /** + * @param array $time Result of UIDGenerator::millitime() + * @return string 46 LSBs of "milliseconds since epoch" in binary (rolls over in 4201) + * @throws RuntimeException + */ + protected function millisecondsSinceEpochBinary( array $time ) { + list( $sec, $msec ) = $time; + $ts = 1000 * $sec + $msec; + if ( $ts > pow( 2, 52 ) ) { + throw new RuntimeException( __METHOD__ . + ': sorry, this function doesn\'t work after the year 144680' ); + } + + return substr( Wikimedia\base_convert( $ts, 10, 2, 46 ), -46 ); + } + + /** + * @param array $time Result of UIDGenerator::millitime() + * @param int $delta Number of intervals to add on to the timestamp + * @return string 60 bits of "100ns intervals since 15 October 1582" (rolls over in 3400) + * @throws RuntimeException + */ + protected function intervalsSinceGregorianBinary( array $time, $delta = 0 ) { + list( $sec, $msec ) = $time; + $offset = '122192928000000000'; + if ( PHP_INT_SIZE >= 8 ) { // 64 bit integers + $ts = ( 1000 * $sec + $msec ) * 10000 + (int)$offset + $delta; + $id_bin = str_pad( decbin( $ts % pow( 2, 60 ) ), 60, '0', STR_PAD_LEFT ); + } elseif ( extension_loaded( 'gmp' ) ) { + $ts = gmp_add( gmp_mul( (string)$sec, '1000' ), (string)$msec ); // ms + $ts = gmp_add( gmp_mul( $ts, '10000' ), $offset ); // 100ns intervals + $ts = gmp_add( $ts, (string)$delta ); + $ts = gmp_mod( $ts, gmp_pow( '2', '60' ) ); // wrap around + $id_bin = str_pad( gmp_strval( $ts, 2 ), 60, '0', STR_PAD_LEFT ); + } elseif ( extension_loaded( 'bcmath' ) ) { + $ts = bcadd( bcmul( $sec, 1000 ), $msec ); // ms + $ts = bcadd( bcmul( $ts, 10000 ), $offset ); // 100ns intervals + $ts = bcadd( $ts, $delta ); + $ts = bcmod( $ts, bcpow( 2, 60 ) ); // wrap around + $id_bin = Wikimedia\base_convert( $ts, 10, 2, 60 ); + } else { + throw new RuntimeException( 'bcmath or gmp extension required for 32 bit machines.' ); + } + return $id_bin; + } + + /** + * @return array (current time in seconds, milliseconds since then) + */ + protected static function millitime() { + list( $msec, $sec ) = explode( ' ', microtime() ); + + return [ (int)$sec, (int)( $msec * 1000 ) ]; + } + + /** + * Delete all cache files that have been created. + * + * This is a cleanup method primarily meant to be used from unit tests to + * avoid poluting the local filesystem. If used outside of a unit test + * environment it should be used with caution as it may destroy state saved + * in the files. + * + * @see unitTestTearDown + * @since 1.23 + */ + protected function deleteCacheFiles() { + // Bug: 44850 + foreach ( $this->fileHandles as $path => $handle ) { + if ( $handle !== null ) { + fclose( $handle ); + } + if ( is_file( $path ) ) { + unlink( $path ); + } + unset( $this->fileHandles[$path] ); + } + if ( is_file( $this->nodeIdFile ) ) { + unlink( $this->nodeIdFile ); + } + } + + /** + * Cleanup resources when tearing down after a unit test. + * + * This is a cleanup method primarily meant to be used from unit tests to + * avoid poluting the local filesystem. If used outside of a unit test + * environment it should be used with caution as it may destroy state saved + * in the files. + * + * @see deleteCacheFiles + * @since 1.23 + */ + public static function unitTestTearDown() { + // Bug: 44850 + $gen = self::singleton(); + $gen->deleteCacheFiles(); + } + + function __destruct() { + array_map( 'fclose', array_filter( $this->fileHandles ) ); + } +} diff --git a/www/wiki/includes/utils/ZipDirectoryReader.php b/www/wiki/includes/utils/ZipDirectoryReader.php new file mode 100644 index 00000000..f0ace2cc --- /dev/null +++ b/www/wiki/includes/utils/ZipDirectoryReader.php @@ -0,0 +1,717 @@ +<?php +/** + * ZIP file directories reader, for the purposes of upload verification. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * A class for reading ZIP file directories, for the purposes of upload + * verification. + * + * Only a functional interface is provided: ZipFileReader::read(). No access is + * given to object instances. + */ +class ZipDirectoryReader { + /** + * Read a ZIP file and call a function for each file discovered in it. + * + * Because this class is aimed at verification, an error is raised on + * suspicious or ambiguous input, instead of emulating some standard + * behavior. + * + * @param string $fileName The archive file name + * @param array $callback The callback function. It will be called for each file + * with a single associative array each time, with members: + * + * - name: The file name. Directories conventionally have a trailing + * slash. + * + * - mtime: The file modification time, in MediaWiki 14-char format + * + * - size: The uncompressed file size + * + * @param array $options An associative array of read options, with the option + * name in the key. This may currently contain: + * + * - zip64: If this is set to true, then we will emulate a + * library with ZIP64 support, like OpenJDK 7. If it is set to + * false, then we will emulate a library with no knowledge of + * ZIP64. + * + * NOTE: The ZIP64 code is untested and probably doesn't work. It + * turned out to be easier to just reject ZIP64 archive uploads, + * since they are likely to be very rare. Confirming safety of a + * ZIP64 file is fairly complex. What do you do with a file that is + * ambiguous and broken when read with a non-ZIP64 reader, but valid + * when read with a ZIP64 reader? This situation is normal for a + * valid ZIP64 file, and working out what non-ZIP64 readers will make + * of such a file is not trivial. + * + * @return Status A Status object. The following fatal errors are defined: + * + * - zip-file-open-error: The file could not be opened. + * + * - zip-wrong-format: The file does not appear to be a ZIP file. + * + * - zip-bad: There was something wrong or ambiguous about the file + * data. + * + * - zip-unsupported: The ZIP file uses features which + * ZipDirectoryReader does not support. + * + * The default messages for those fatal errors are written in a way that + * makes sense for upload verification. + * + * If a fatal error is returned, more information about the error will be + * available in the debug log. + * + * Note that the callback function may be called any number of times before + * a fatal error is returned. If this occurs, the data sent to the callback + * function should be discarded. + */ + public static function read( $fileName, $callback, $options = [] ) { + $zdr = new self( $fileName, $callback, $options ); + + return $zdr->execute(); + } + + /** The file name */ + protected $fileName; + + /** The opened file resource */ + protected $file; + + /** The cached length of the file, or null if it has not been loaded yet. */ + protected $fileLength; + + /** A segmented cache of the file contents */ + protected $buffer; + + /** The file data callback */ + protected $callback; + + /** The ZIP64 mode */ + protected $zip64 = false; + + /** Stored headers */ + protected $eocdr, $eocdr64, $eocdr64Locator; + + protected $data; + + /** The "extra field" ID for ZIP64 central directory entries */ + const ZIP64_EXTRA_HEADER = 0x0001; + + /** The segment size for the file contents cache */ + const SEGSIZE = 16384; + + /** The index of the "general field" bit for UTF-8 file names */ + const GENERAL_UTF8 = 11; + + /** The index of the "general field" bit for central directory encryption */ + const GENERAL_CD_ENCRYPTED = 13; + + /** + * Private constructor + * @param string $fileName + * @param callable $callback + * @param array $options + */ + protected function __construct( $fileName, $callback, $options ) { + $this->fileName = $fileName; + $this->callback = $callback; + + if ( isset( $options['zip64'] ) ) { + $this->zip64 = $options['zip64']; + } + } + + /** + * Read the directory according to settings in $this. + * + * @return Status + */ + function execute() { + $this->file = fopen( $this->fileName, 'r' ); + $this->data = []; + if ( !$this->file ) { + return Status::newFatal( 'zip-file-open-error' ); + } + + $status = Status::newGood(); + try { + $this->readEndOfCentralDirectoryRecord(); + if ( $this->zip64 ) { + list( $offset, $size ) = $this->findZip64CentralDirectory(); + $this->readCentralDirectory( $offset, $size ); + } else { + if ( $this->eocdr['CD size'] == 0xffffffff + || $this->eocdr['CD offset'] == 0xffffffff + || $this->eocdr['CD entries total'] == 0xffff + ) { + $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' . + 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' . + 'opening vulnerabilities on clients using OpenJDK 7 or later.' ); + } + + list( $offset, $size ) = $this->findOldCentralDirectory(); + $this->readCentralDirectory( $offset, $size ); + } + } catch ( ZipDirectoryReaderError $e ) { + $status->fatal( $e->getErrorCode() ); + } + + fclose( $this->file ); + + return $status; + } + + /** + * Throw an error, and log a debug message + * @param mixed $code + * @param string $debugMessage + * @throws ZipDirectoryReaderError + */ + function error( $code, $debugMessage ) { + wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" ); + throw new ZipDirectoryReaderError( $code ); + } + + /** + * Read the header which is at the end of the central directory, + * unimaginatively called the "end of central directory record" by the ZIP + * spec. + */ + function readEndOfCentralDirectoryRecord() { + $info = [ + 'signature' => 4, + 'disk' => 2, + 'CD start disk' => 2, + 'CD entries this disk' => 2, + 'CD entries total' => 2, + 'CD size' => 4, + 'CD offset' => 4, + 'file comment length' => 2, + ]; + $structSize = $this->getStructSize( $info ); + $startPos = $this->getFileLength() - 65536 - $structSize; + if ( $startPos < 0 ) { + $startPos = 0; + } + + if ( $this->getFileLength() === 0 ) { + $this->error( 'zip-wrong-format', "The file is empty." ); + } + + $block = $this->getBlock( $startPos ); + $sigPos = strrpos( $block, "PK\x05\x06" ); + if ( $sigPos === false ) { + $this->error( 'zip-wrong-format', + "zip file lacks EOCDR signature. It probably isn't a zip file." ); + } + + $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info ); + $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length']; + + if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) { + $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' ); + } + if ( $this->eocdr['disk'] !== 0 + || $this->eocdr['CD start disk'] !== 0 + ) { + $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' ); + } + $this->eocdr += $this->unpack( + $block, + [ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ], + $sigPos + $structSize ); + $this->eocdr['position'] = $startPos + $sigPos; + } + + /** + * Read the header called the "ZIP64 end of central directory locator". An + * error will be raised if it does not exist. + */ + function readZip64EndOfCentralDirectoryLocator() { + $info = [ + 'signature' => [ 'string', 4 ], + 'eocdr64 start disk' => 4, + 'eocdr64 offset' => 8, + 'number of disks' => 4, + ]; + $structSize = $this->getStructSize( $info ); + + $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize; + $block = $this->getBlock( $start, $structSize ); + $this->eocdr64Locator = $data = $this->unpack( $block, $info ); + + if ( $data['signature'] !== "PK\x06\x07" ) { + // Note: Java will allow this and continue to read the + // EOCDR64, so we have to reject the upload, we can't + // just use the EOCDR header instead. + $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' ); + } + } + + /** + * Read the header called the "ZIP64 end of central directory record". It + * may replace the regular "end of central directory record" in ZIP64 files. + */ + function readZip64EndOfCentralDirectoryRecord() { + if ( $this->eocdr64Locator['eocdr64 start disk'] != 0 + || $this->eocdr64Locator['number of disks'] != 0 + ) { + $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' ); + } + + $info = [ + 'signature' => [ 'string', 4 ], + 'EOCDR64 size' => 8, + 'version made by' => 2, + 'version needed' => 2, + 'disk' => 4, + 'CD start disk' => 4, + 'CD entries this disk' => 8, + 'CD entries total' => 8, + 'CD size' => 8, + 'CD offset' => 8 + ]; + $structSize = $this->getStructSize( $info ); + $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize ); + $this->eocdr64 = $data = $this->unpack( $block, $info ); + if ( $data['signature'] !== "PK\x06\x06" ) { + $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' ); + } + if ( $data['disk'] !== 0 + || $data['CD start disk'] !== 0 + ) { + $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' ); + } + } + + /** + * Find the location of the central directory, as would be seen by a + * non-ZIP64 reader. + * + * @return array List containing offset, size and end position. + */ + function findOldCentralDirectory() { + $size = $this->eocdr['CD size']; + $offset = $this->eocdr['CD offset']; + $endPos = $this->eocdr['position']; + + // Some readers use the EOCDR position instead of the offset field + // to find the directory, so to be safe, we check if they both agree. + if ( $offset + $size != $endPos ) { + $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . + 'of central directory record' ); + } + + return [ $offset, $size ]; + } + + /** + * Find the location of the central directory, as would be seen by a + * ZIP64-compliant reader. + * + * @return array List containing offset, size and end position. + */ + function findZip64CentralDirectory() { + // The spec is ambiguous about the exact rules of precedence between the + // ZIP64 headers and the original headers. Here we follow zip_util.c + // from OpenJDK 7. + $size = $this->eocdr['CD size']; + $offset = $this->eocdr['CD offset']; + $numEntries = $this->eocdr['CD entries total']; + $endPos = $this->eocdr['position']; + if ( $size == 0xffffffff + || $offset == 0xffffffff + || $numEntries == 0xffff + ) { + $this->readZip64EndOfCentralDirectoryLocator(); + + if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) { + $this->readZip64EndOfCentralDirectoryRecord(); + if ( isset( $this->eocdr64['CD offset'] ) ) { + $size = $this->eocdr64['CD size']; + $offset = $this->eocdr64['CD offset']; + $endPos = $this->eocdr64Locator['eocdr64 offset']; + } + } + } + // Some readers use the EOCDR position instead of the offset field + // to find the directory, so to be safe, we check if they both agree. + if ( $offset + $size != $endPos ) { + $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . + 'of central directory record' ); + } + + return [ $offset, $size ]; + } + + /** + * Read the central directory at the given location + * @param int $offset + * @param int $size + */ + function readCentralDirectory( $offset, $size ) { + $block = $this->getBlock( $offset, $size ); + + $fixedInfo = [ + 'signature' => [ 'string', 4 ], + 'version made by' => 2, + 'version needed' => 2, + 'general bits' => 2, + 'compression method' => 2, + 'mod time' => 2, + 'mod date' => 2, + 'crc-32' => 4, + 'compressed size' => 4, + 'uncompressed size' => 4, + 'name length' => 2, + 'extra field length' => 2, + 'comment length' => 2, + 'disk number start' => 2, + 'internal attrs' => 2, + 'external attrs' => 4, + 'local header offset' => 4, + ]; + $fixedSize = $this->getStructSize( $fixedInfo ); + + $pos = 0; + while ( $pos < $size ) { + $data = $this->unpack( $block, $fixedInfo, $pos ); + $pos += $fixedSize; + + if ( $data['signature'] !== "PK\x01\x02" ) { + $this->error( 'zip-bad', 'Invalid signature found in directory entry' ); + } + + $variableInfo = [ + 'name' => [ 'string', $data['name length'] ], + 'extra field' => [ 'string', $data['extra field length'] ], + 'comment' => [ 'string', $data['comment length'] ], + ]; + $data += $this->unpack( $block, $variableInfo, $pos ); + $pos += $this->getStructSize( $variableInfo ); + + if ( $this->zip64 && ( + $data['compressed size'] == 0xffffffff + || $data['uncompressed size'] == 0xffffffff + || $data['local header offset'] == 0xffffffff ) + ) { + $zip64Data = $this->unpackZip64Extra( $data['extra field'] ); + if ( $zip64Data ) { + $data = $zip64Data + $data; + } + } + + if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) { + $this->error( 'zip-unsupported', 'central directory encryption is not supported' ); + } + + // Convert the timestamp into MediaWiki format + // For the format, please see the MS-DOS 2.0 Programmer's Reference, + // pages 3-5 and 3-6. + $time = $data['mod time']; + $date = $data['mod date']; + + $year = 1980 + ( $date >> 9 ); + $month = ( $date >> 5 ) & 15; + $day = $date & 31; + $hour = ( $time >> 11 ) & 31; + $minute = ( $time >> 5 ) & 63; + $second = ( $time & 31 ) * 2; + $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d", + $year, $month, $day, $hour, $minute, $second ); + + // Convert the character set in the file name + if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) { + $name = $data['name']; + } else { + $name = iconv( 'CP437', 'UTF-8', $data['name'] ); + } + + // Compile a data array for the user, with a sensible format + $userData = [ + 'name' => $name, + 'mtime' => $timestamp, + 'size' => $data['uncompressed size'], + ]; + call_user_func( $this->callback, $userData ); + } + } + + /** + * Interpret ZIP64 "extra field" data and return an associative array. + * @param string $extraField + * @return array|bool + */ + function unpackZip64Extra( $extraField ) { + $extraHeaderInfo = [ + 'id' => 2, + 'size' => 2, + ]; + $extraHeaderSize = $this->getStructSize( $extraHeaderInfo ); + + $zip64ExtraInfo = [ + 'uncompressed size' => 8, + 'compressed size' => 8, + 'local header offset' => 8, + 'disk number start' => 4, + ]; + + $extraPos = 0; + while ( $extraPos < strlen( $extraField ) ) { + $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos ); + $extraPos += $extraHeaderSize; + $extra += $this->unpack( $extraField, + [ 'data' => [ 'string', $extra['size'] ] ], + $extraPos ); + $extraPos += $extra['size']; + + if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) { + return $this->unpack( $extra['data'], $zip64ExtraInfo ); + } + } + + return false; + } + + /** + * Get the length of the file. + * @return int + */ + function getFileLength() { + if ( $this->fileLength === null ) { + $stat = fstat( $this->file ); + $this->fileLength = $stat['size']; + } + + return $this->fileLength; + } + + /** + * Get the file contents from a given offset. If there are not enough bytes + * in the file to satisfy the request, an exception will be thrown. + * + * @param int $start The byte offset of the start of the block. + * @param int $length The number of bytes to return. If omitted, the remainder + * of the file will be returned. + * + * @return string + */ + function getBlock( $start, $length = null ) { + $fileLength = $this->getFileLength(); + if ( $start >= $fileLength ) { + $this->error( 'zip-bad', "getBlock() requested position $start, " . + "file length is $fileLength" ); + } + if ( $length === null ) { + $length = $fileLength - $start; + } + $end = $start + $length; + if ( $end > $fileLength ) { + $this->error( 'zip-bad', "getBlock() requested end position $end, " . + "file length is $fileLength" ); + } + $startSeg = floor( $start / self::SEGSIZE ); + $endSeg = ceil( $end / self::SEGSIZE ); + + $block = ''; + for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) { + $block .= $this->getSegment( $segIndex ); + } + + $block = substr( $block, + $start - $startSeg * self::SEGSIZE, + $length ); + + if ( strlen( $block ) < $length ) { + $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' ); + } + + return $block; + } + + /** + * Get a section of the file starting at position $segIndex * self::SEGSIZE, + * of length self::SEGSIZE. The result is cached. This is a helper function + * for getBlock(). + * + * If there are not enough bytes in the file to satisfy the request, the + * return value will be truncated. If a request is made for a segment beyond + * the end of the file, an empty string will be returned. + * + * @param int $segIndex + * + * @return string + */ + function getSegment( $segIndex ) { + if ( !isset( $this->buffer[$segIndex] ) ) { + $bytePos = $segIndex * self::SEGSIZE; + if ( $bytePos >= $this->getFileLength() ) { + $this->buffer[$segIndex] = ''; + + return ''; + } + if ( fseek( $this->file, $bytePos ) ) { + $this->error( 'zip-bad', "seek to $bytePos failed" ); + } + $seg = fread( $this->file, self::SEGSIZE ); + if ( $seg === false ) { + $this->error( 'zip-bad', "read from $bytePos failed" ); + } + $this->buffer[$segIndex] = $seg; + } + + return $this->buffer[$segIndex]; + } + + /** + * Get the size of a structure in bytes. See unpack() for the format of $struct. + * @param array $struct + * @return int + */ + function getStructSize( $struct ) { + $size = 0; + foreach ( $struct as $type ) { + if ( is_array( $type ) ) { + list( , $fieldSize ) = $type; + $size += $fieldSize; + } else { + $size += $type; + } + } + + return $size; + } + + /** + * Unpack a binary structure. This is like the built-in unpack() function + * except nicer. + * + * @param string $string The binary data input + * + * @param array $struct An associative array giving structure members and their + * types. In the key is the field name. The value may be either an + * integer, in which case the field is a little-endian unsigned integer + * encoded in the given number of bytes, or an array, in which case the + * first element of the array is the type name, and the subsequent + * elements are type-dependent parameters. Only one such type is defined: + * - "string": The second array element gives the length of string. + * Not null terminated. + * + * @param int $offset The offset into the string at which to start unpacking. + * + * @throws MWException + * @return array Unpacked associative array. Note that large integers in the input + * may be represented as floating point numbers in the return value, so + * the use of weak comparison is advised. + */ + function unpack( $string, $struct, $offset = 0 ) { + $size = $this->getStructSize( $struct ); + if ( $offset + $size > strlen( $string ) ) { + $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' ); + } + + $data = []; + $pos = $offset; + foreach ( $struct as $key => $type ) { + if ( is_array( $type ) ) { + list( $typeName, $fieldSize ) = $type; + switch ( $typeName ) { + case 'string': + $data[$key] = substr( $string, $pos, $fieldSize ); + $pos += $fieldSize; + break; + default: + throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" ); + } + } else { + // Unsigned little-endian integer + $length = intval( $type ); + + // Calculate the value. Use an algorithm which automatically + // upgrades the value to floating point if necessary. + $value = 0; + for ( $i = $length - 1; $i >= 0; $i-- ) { + $value *= 256; + $value += ord( $string[$pos + $i] ); + } + + // Throw an exception if there was loss of precision + if ( $value > pow( 2, 52 ) ) { + $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' . + 'This could happen if we tried to unpack a 64-bit structure ' . + 'at an invalid location.' ); + } + $data[$key] = $value; + $pos += $length; + } + } + + return $data; + } + + /** + * Returns a bit from a given position in an integer value, converted to + * boolean. + * + * @param int $value + * @param int $bitIndex The index of the bit, where 0 is the LSB. + * @return bool + */ + function testBit( $value, $bitIndex ) { + return (bool)( ( $value >> $bitIndex ) & 1 ); + } + + /** + * Debugging helper function which dumps a string in hexdump -C format. + * @param string $s + */ + function hexDump( $s ) { + $n = strlen( $s ); + for ( $i = 0; $i < $n; $i += 16 ) { + printf( "%08X ", $i ); + for ( $j = 0; $j < 16; $j++ ) { + print " "; + if ( $j == 8 ) { + print " "; + } + if ( $i + $j >= $n ) { + print " "; + } else { + printf( "%02X", ord( $s[$i + $j] ) ); + } + } + + print " |"; + for ( $j = 0; $j < 16; $j++ ) { + if ( $i + $j >= $n ) { + print " "; + } elseif ( ctype_print( $s[$i + $j] ) ) { + print $s[$i + $j]; + } else { + print '.'; + } + } + print "|\n"; + } + } +} diff --git a/www/wiki/includes/utils/ZipDirectoryReaderError.php b/www/wiki/includes/utils/ZipDirectoryReaderError.php new file mode 100644 index 00000000..592036e3 --- /dev/null +++ b/www/wiki/includes/utils/ZipDirectoryReaderError.php @@ -0,0 +1,38 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * Internal exception class. Will be caught by private code. + */ +class ZipDirectoryReaderError extends Exception { + protected $errorCode; + + function __construct( $code ) { + $this->errorCode = $code; + parent::__construct( "ZipDirectoryReader error: $code" ); + } + + /** + * @return mixed + */ + function getErrorCode() { + return $this->errorCode; + } +} |